mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Upgrading the bundled PCRE to 8.34
This commit is contained in:
@ -8576,7 +8576,7 @@ char *re_eprint(int err)
|
|||||||
{
|
{
|
||||||
static char epbuf[100];
|
static char epbuf[100];
|
||||||
size_t len __attribute__((unused))=
|
size_t len __attribute__((unused))=
|
||||||
regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
regerror(err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
||||||
assert(len <= sizeof(epbuf));
|
assert(len <= sizeof(epbuf));
|
||||||
return(epbuf);
|
return(epbuf);
|
||||||
}
|
}
|
||||||
|
@ -61,6 +61,10 @@
|
|||||||
# 2012-09-08 ChPe added PCRE32 support
|
# 2012-09-08 ChPe added PCRE32 support
|
||||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||||
|
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||||
|
# so it has been removed.
|
||||||
|
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||||
|
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||||
|
|
||||||
PROJECT(PCRE C CXX)
|
PROJECT(PCRE C CXX)
|
||||||
|
|
||||||
@ -107,6 +111,9 @@ CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
|
|||||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||||
|
|
||||||
|
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||||
|
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
||||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
@ -322,6 +329,15 @@ TARGET_LINK_LIBRARIES(pcreposix pcre)
|
|||||||
|
|
||||||
# Executables
|
# Executables
|
||||||
|
|
||||||
|
# Removed by PH (2008-01-23) because pcredemo shouldn't really be built
|
||||||
|
# automatically, and it gave trouble in some environments anyway.
|
||||||
|
# ADD_EXECUTABLE(pcredemo pcredemo.c)
|
||||||
|
# TARGET_LINK_LIBRARIES(pcredemo pcreposix)
|
||||||
|
# IF(NOT BUILD_SHARED_LIBS)
|
||||||
|
# # make sure to not use declspec(dllimport) in static mode on windows
|
||||||
|
# SET_TARGET_PROPERTIES(pcredemo PROPERTIES COMPILE_FLAGS "-DPCRE_STATIC")
|
||||||
|
# ENDIF(NOT BUILD_SHARED_LIBS)
|
||||||
|
|
||||||
IF(PCRE_BUILD_PCREGREP)
|
IF(PCRE_BUILD_PCREGREP)
|
||||||
ADD_EXECUTABLE(pcregrep pcregrep.c)
|
ADD_EXECUTABLE(pcregrep pcregrep.c)
|
||||||
SET(targets ${targets} pcregrep)
|
SET(targets ${targets} pcregrep)
|
||||||
@ -469,17 +485,25 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
MESSAGE(STATUS "PCRE configuration summary:")
|
MESSAGE(STATUS "PCRE configuration summary:")
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
|
# MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||||
|
# MESSAGE(STATUS " C++ compiler .................... : ${CMAKE_CXX_COMPILER}")
|
||||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||||
# MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
|
# MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
|
||||||
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
||||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
||||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
||||||
|
# MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
|
||||||
|
# MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE_EBCDIC_NL25}")
|
||||||
|
# MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
|
||||||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
||||||
|
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
|
||||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
||||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
||||||
|
# MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||||
|
# MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||||
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
||||||
# MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
|
# MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
|
||||||
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
|
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
|
||||||
|
210
pcre/ChangeLog
210
pcre/ChangeLog
@ -1,8 +1,216 @@
|
|||||||
ChangeLog for PCRE
|
ChangeLog for PCRE
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
Version 8.34 15-December-2013
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
1. Add pcre[16|32]_jit_free_unused_memory to forcibly free unused JIT
|
||||||
|
executable memory. Patch inspired by Carsten Klein.
|
||||||
|
|
||||||
|
2. ./configure --enable-coverage defined SUPPORT_GCOV in config.h, although
|
||||||
|
this macro is never tested and has no effect, because the work to support
|
||||||
|
coverage involves only compiling and linking options and special targets in
|
||||||
|
the Makefile. The comment in config.h implied that defining the macro would
|
||||||
|
enable coverage support, which is totally false. There was also support for
|
||||||
|
setting this macro in the CMake files (my fault, I just copied it from
|
||||||
|
configure). SUPPORT_GCOV has now been removed.
|
||||||
|
|
||||||
|
3. Make a small performance improvement in strlen16() and strlen32() in
|
||||||
|
pcretest.
|
||||||
|
|
||||||
|
4. Change 36 for 8.33 left some unreachable statements in pcre_exec.c,
|
||||||
|
detected by the Solaris compiler (gcc doesn't seem to be able to diagnose
|
||||||
|
these cases). There was also one in pcretest.c.
|
||||||
|
|
||||||
|
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
|
||||||
|
|
||||||
|
6. In UTF mode, the code for checking whether a group could match an empty
|
||||||
|
string (which is used for indefinitely repeated groups to allow for
|
||||||
|
breaking an infinite loop) was broken when the group contained a repeated
|
||||||
|
negated single-character class with a character that occupied more than one
|
||||||
|
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
|
||||||
|
UTF-8 mode). The effect was undefined: the group might or might not be
|
||||||
|
deemed as matching an empty string, or the program might have crashed.
|
||||||
|
|
||||||
|
7. The code for checking whether a group could match an empty string was not
|
||||||
|
recognizing that \h, \H, \v, \V, and \R must match a character.
|
||||||
|
|
||||||
|
8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||||
|
an empty string. If it can, pcretest shows this in its information output.
|
||||||
|
|
||||||
|
9. Fixed two related bugs that applied to Unicode extended grapheme clusters
|
||||||
|
that were repeated with a maximizing qualifier (e.g. \X* or \X{2,5}) when
|
||||||
|
matched by pcre_exec() without using JIT:
|
||||||
|
|
||||||
|
(a) If the rest of the pattern did not match after a maximal run of
|
||||||
|
grapheme clusters, the code for backing up to try with fewer of them
|
||||||
|
did not always back up over a full grapheme when characters that do not
|
||||||
|
have the modifier quality were involved, e.g. Hangul syllables.
|
||||||
|
|
||||||
|
(b) If the match point in a subject started with modifier character, and
|
||||||
|
there was no match, the code could incorrectly back up beyond the match
|
||||||
|
point, and potentially beyond the first character in the subject,
|
||||||
|
leading to a segfault or an incorrect match result.
|
||||||
|
|
||||||
|
10. A conditional group with an assertion condition could lead to PCRE
|
||||||
|
recording an incorrect first data item for a match if no other first data
|
||||||
|
item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
|
||||||
|
first data item, and therefore matched "ca" after "c" instead of at the
|
||||||
|
start.
|
||||||
|
|
||||||
|
11. Change 40 for 8.33 (allowing pcregrep to find empty strings) showed up a
|
||||||
|
bug that caused the command "echo a | ./pcregrep -M '|a'" to loop.
|
||||||
|
|
||||||
|
12. The source of pcregrep now includes z/OS-specific code so that it can be
|
||||||
|
compiled for z/OS as part of the special z/OS distribution.
|
||||||
|
|
||||||
|
13. Added the -T and -TM options to pcretest.
|
||||||
|
|
||||||
|
14. The code in pcre_compile.c for creating the table of named capturing groups
|
||||||
|
has been refactored. Instead of creating the table dynamically during the
|
||||||
|
actual compiling pass, the information is remembered during the pre-compile
|
||||||
|
pass (on the stack unless there are more than 20 named groups, in which
|
||||||
|
case malloc() is used) and the whole table is created before the actual
|
||||||
|
compile happens. This has simplified the code (it is now nearly 150 lines
|
||||||
|
shorter) and prepared the way for better handling of references to groups
|
||||||
|
with duplicate names.
|
||||||
|
|
||||||
|
15. A back reference to a named subpattern when there is more than one of the
|
||||||
|
same name now checks them in the order in which they appear in the pattern.
|
||||||
|
The first one that is set is used for the reference. Previously only the
|
||||||
|
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||||
|
|
||||||
|
16. Unicode character properties were updated from Unicode 6.3.0.
|
||||||
|
|
||||||
|
17. The compile-time code for auto-possessification has been refactored, based
|
||||||
|
on a patch by Zoltan Herczeg. It now happens after instead of during
|
||||||
|
compilation. The code is cleaner, and more cases are handled. The option
|
||||||
|
PCRE_NO_AUTO_POSSESS is added for testing purposes, and the -O and /O
|
||||||
|
options in pcretest are provided to set it. It can also be set by
|
||||||
|
(*NO_AUTO_POSSESS) at the start of a pattern.
|
||||||
|
|
||||||
|
18. The character VT has been added to the default ("C" locale) set of
|
||||||
|
characters that match \s and are generally treated as white space,
|
||||||
|
following this same change in Perl 5.18. There is now no difference between
|
||||||
|
"Perl space" and "POSIX space". Whether VT is treated as white space in
|
||||||
|
other locales depends on the locale.
|
||||||
|
|
||||||
|
19. The code for checking named groups as conditions, either for being set or
|
||||||
|
for being recursed, has been refactored (this is related to 14 and 15
|
||||||
|
above). Processing unduplicated named groups should now be as fast at
|
||||||
|
numerical groups, and processing duplicated groups should be faster than
|
||||||
|
before.
|
||||||
|
|
||||||
|
20. Two patches to the CMake build system, by Alexander Barkov:
|
||||||
|
|
||||||
|
(1) Replace the "source" command by "." in CMakeLists.txt because
|
||||||
|
"source" is a bash-ism.
|
||||||
|
|
||||||
|
(2) Add missing HAVE_STDINT_H and HAVE_INTTYPES_H to config-cmake.h.in;
|
||||||
|
without these the CMake build does not work on Solaris.
|
||||||
|
|
||||||
|
21. Perl has changed its handling of \8 and \9. If there is no previously
|
||||||
|
encountered capturing group of those numbers, they are treated as the
|
||||||
|
literal characters 8 and 9 instead of a binary zero followed by the
|
||||||
|
literals. PCRE now does the same.
|
||||||
|
|
||||||
|
22. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||||
|
possible to specify values greater than 0777 and also making them
|
||||||
|
unambiguous.
|
||||||
|
|
||||||
|
23. Perl now gives an error for missing closing braces after \x{... instead of
|
||||||
|
treating the string as literal. PCRE now does the same.
|
||||||
|
|
||||||
|
24. RunTest used to grumble if an inappropriate test was selected explicitly,
|
||||||
|
but just skip it when running all tests. This make it awkward to run ranges
|
||||||
|
of tests when one of them was inappropriate. Now it just skips any
|
||||||
|
inappropriate tests, as it always did when running all tests.
|
||||||
|
|
||||||
|
25. If PCRE_AUTO_CALLOUT and PCRE_UCP were set for a pattern that contained
|
||||||
|
character types such as \d or \w, too many callouts were inserted, and the
|
||||||
|
data that they returned was rubbish.
|
||||||
|
|
||||||
|
26. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||||
|
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||||
|
were matched by \h. The code has now been refactored so that the lists of
|
||||||
|
the horizontal and vertical whitespace characters used for \h and \v (which
|
||||||
|
are defined only in one place) are now also used for \s.
|
||||||
|
|
||||||
|
27. Add JIT support for the 64 bit TileGX architecture.
|
||||||
|
Patch by Jiong Wang (Tilera Corporation).
|
||||||
|
|
||||||
|
28. Possessive quantifiers for classes (both explicit and automatically
|
||||||
|
generated) now use special opcodes instead of wrapping in ONCE brackets.
|
||||||
|
|
||||||
|
29. Whereas an item such as A{4}+ ignored the possessivenes of the quantifier
|
||||||
|
(because it's meaningless), this was not happening when PCRE_CASELESS was
|
||||||
|
set. Not wrong, but inefficient.
|
||||||
|
|
||||||
|
30. Updated perltest.pl to add /u (force Unicode mode) when /W (use Unicode
|
||||||
|
properties for \w, \d, etc) is present in a test regex. Otherwise if the
|
||||||
|
test contains no characters greater than 255, Perl doesn't realise it
|
||||||
|
should be using Unicode semantics.
|
||||||
|
|
||||||
|
31. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||||
|
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||||
|
does in Unicode mode.
|
||||||
|
|
||||||
|
32. Added the "forbid" facility to pcretest so that putting tests into the
|
||||||
|
wrong test files can sometimes be quickly detected.
|
||||||
|
|
||||||
|
33. There is now a limit (default 250) on the depth of nesting of parentheses.
|
||||||
|
This limit is imposed to control the amount of system stack used at compile
|
||||||
|
time. It can be changed at build time by --with-parens-nest-limit=xxx or
|
||||||
|
the equivalent in CMake.
|
||||||
|
|
||||||
|
34. Character classes such as [A-\d] or [a-[:digit:]] now cause compile-time
|
||||||
|
errors. Perl warns for these when in warning mode, but PCRE has no facility
|
||||||
|
for giving warnings.
|
||||||
|
|
||||||
|
35. Change 34 for 8.13 allowed quantifiers on assertions, because Perl does.
|
||||||
|
However, this was not working for (?!) because it is optimized to (*FAIL),
|
||||||
|
for which PCRE does not allow quantifiers. The optimization is now disabled
|
||||||
|
when a quantifier follows (?!). I can't see any use for this, but it makes
|
||||||
|
things uniform.
|
||||||
|
|
||||||
|
36. Perl no longer allows group names to start with digits, so I have made this
|
||||||
|
change also in PCRE. It simplifies the code a bit.
|
||||||
|
|
||||||
|
37. In extended mode, Perl ignores spaces before a + that indicates a
|
||||||
|
possessive quantifier. PCRE allowed a space before the quantifier, but not
|
||||||
|
before the possessive +. It now does.
|
||||||
|
|
||||||
|
38. The use of \K (reset reported match start) within a repeated possessive
|
||||||
|
group such as (a\Kb)*+ was not working.
|
||||||
|
|
||||||
|
40. Document that the same character tables must be used at compile time and
|
||||||
|
run time, and that the facility to pass tables to pcre_exec() and
|
||||||
|
pcre_dfa_exec() is for use only with saved/restored patterns.
|
||||||
|
|
||||||
|
41. Applied Jeff Trawick's patch CMakeLists.txt, which "provides two new
|
||||||
|
features for Builds with MSVC:
|
||||||
|
|
||||||
|
1. Support pcre.rc and/or pcreposix.rc (as is already done for MinGW
|
||||||
|
builds). The .rc files can be used to set FileDescription and many other
|
||||||
|
attributes.
|
||||||
|
|
||||||
|
2. Add an option (-DINSTALL_MSVC_PDB) to enable installation of .pdb files.
|
||||||
|
This allows higher-level build scripts which want .pdb files to avoid
|
||||||
|
hard-coding the exact files needed."
|
||||||
|
|
||||||
|
42. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||||
|
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||||
|
|
||||||
|
43. A minimizing repeat of a class containing codepoints greater than 255 in
|
||||||
|
non-UTF 16-bit or 32-bit modes caused an internal error when PCRE was
|
||||||
|
compiled to use the heap for recursion.
|
||||||
|
|
||||||
|
44. Got rid of some compiler warnings for unused variables when UTF but not UCP
|
||||||
|
is configured.
|
||||||
|
|
||||||
|
|
||||||
Version 8.33 28-May-2013
|
Version 8.33 28-May-2013
|
||||||
--------------------------
|
------------------------
|
||||||
|
|
||||||
1. Added 'U' to some constants that are compared to unsigned integers, to
|
1. Added 'U' to some constants that are compared to unsigned integers, to
|
||||||
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
|
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
|
||||||
|
269
pcre/HACKING
269
pcre/HACKING
@ -54,12 +54,12 @@ Support for 16-bit and 32-bit data strings
|
|||||||
|
|
||||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
|
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||||
libraries. In the description that follows, the word "short" is
|
different libraries. In the description that follows, the word "short" is used
|
||||||
used for a 16-bit data quantity, and the word "unit" is used for a quantity
|
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||||
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
|
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||||
integer in 32-bit mode. However, so as not to over-complicate the text, the
|
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||||
names of PCRE functions are given in 8-bit form only.
|
given in 8-bit form only.
|
||||||
|
|
||||||
|
|
||||||
Computing the memory requirement: how it was
|
Computing the memory requirement: how it was
|
||||||
@ -94,6 +94,11 @@ runs more slowly than before (30% or more, depending on the pattern) because it
|
|||||||
is doing a full analysis of the pattern. My hope was that this would not be a
|
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||||
big issue, and in the event, nobody has commented on it.
|
big issue, and in the event, nobody has commented on it.
|
||||||
|
|
||||||
|
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||||
|
(default 250, settable at build time) so as to put a limit on the amount of
|
||||||
|
system stack used by pcre_compile(). This is a safety feature for environments
|
||||||
|
with small stacks where the patterns are provided by users.
|
||||||
|
|
||||||
|
|
||||||
Traditional matching function
|
Traditional matching function
|
||||||
-----------------------------
|
-----------------------------
|
||||||
@ -120,29 +125,30 @@ facilities are available, and those that are do not always work in quite the
|
|||||||
same way. See the user documentation for details.
|
same way. See the user documentation for details.
|
||||||
|
|
||||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||||
because it may have a number of states active at one time. More work would be
|
because it may have a number of states active at one time. More work would be
|
||||||
needed at compile time to produce a traditional FSM where only one state is
|
needed at compile time to produce a traditional FSM where only one state is
|
||||||
ever active at once. I believe some other regex matchers work this way.
|
ever active at once. I believe some other regex matchers work this way. JIT
|
||||||
|
support is not available for this kind of matching.
|
||||||
|
|
||||||
|
|
||||||
Changeable options
|
Changeable options
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
|
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||||
change in the middle of patterns. From PCRE 8.13, their processing is handled
|
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||||
entirely at compile time by generating different opcodes for the different
|
is handled entirely at compile time by generating different opcodes for the
|
||||||
settings. The runtime functions do not need to keep track of an options state
|
different settings. The runtime functions do not need to keep track of an
|
||||||
any more.
|
options state any more.
|
||||||
|
|
||||||
|
|
||||||
Format of compiled patterns
|
Format of compiled patterns
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
|
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||||
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
|
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||||
items of variable length. The first unit in an item contains an opcode, and
|
variable length. The first unit in an item contains an opcode, and the length
|
||||||
the length of the item is either implicit in the opcode or contained in the
|
of the item is either implicit in the opcode or contained in the data that
|
||||||
data that follows it.
|
follows it.
|
||||||
|
|
||||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||||
@ -151,8 +157,10 @@ default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
|||||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||||
than 64K are going to be processed. In this description, we assume the "normal"
|
than 64K are going to be processed. In this description, we assume the "normal"
|
||||||
compilation options. Data values that are counts (e.g. for quantifiers) are
|
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||||
always just two bytes long (one short in 16-bit mode).
|
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||||
|
and 32-bit modes.
|
||||||
|
|
||||||
|
|
||||||
Opcodes with no following data
|
Opcodes with no following data
|
||||||
------------------------------
|
------------------------------
|
||||||
@ -162,7 +170,7 @@ These items are all just one unit long
|
|||||||
OP_END end of pattern
|
OP_END end of pattern
|
||||||
OP_ANY match any one character other than newline
|
OP_ANY match any one character other than newline
|
||||||
OP_ALLANY match any one character, including newline
|
OP_ALLANY match any one character, including newline
|
||||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||||
OP_SOD match start of data: \A
|
OP_SOD match start of data: \A
|
||||||
OP_SOM, start of match (subject + offset): \G
|
OP_SOM, start of match (subject + offset): \G
|
||||||
OP_SET_SOM, set start of match (\K)
|
OP_SET_SOM, set start of match (\K)
|
||||||
@ -180,28 +188,33 @@ These items are all just one unit long
|
|||||||
OP_VSPACE \v
|
OP_VSPACE \v
|
||||||
OP_NOT_WORDCHAR \W
|
OP_NOT_WORDCHAR \W
|
||||||
OP_WORDCHAR \w
|
OP_WORDCHAR \w
|
||||||
OP_EODN match end of data or \n at end: \Z
|
OP_EODN match end of data or newline at end: \Z
|
||||||
OP_EOD match end of data: \z
|
OP_EOD match end of data: \z
|
||||||
OP_DOLL $ (end of data, or before final newline)
|
OP_DOLL $ (end of data, or before final newline)
|
||||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||||
OP_EXTUNI match an extended Unicode character
|
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||||
OP_ANYNL match any Unicode newline sequence
|
OP_ANYNL match any Unicode newline sequence
|
||||||
|
|
||||||
|
OP_ASSERT_ACCEPT )
|
||||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||||
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
|
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||||
OP_SKIP ) indicating which parentheses must be closed.
|
OP_SKIP ) indicates which parentheses must be closed.
|
||||||
|
OP_THEN )
|
||||||
|
|
||||||
|
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||||
|
This ends the assertion, not the entire pattern match.
|
||||||
|
|
||||||
|
|
||||||
Backtracking control verbs with (optional) data
|
Backtracking control verbs with optional data
|
||||||
-----------------------------------------------
|
---------------------------------------------
|
||||||
|
|
||||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||||
following in the same format.
|
following in the same format as OP_MARK.
|
||||||
|
|
||||||
|
|
||||||
Matching literal characters
|
Matching literal characters
|
||||||
@ -212,6 +225,10 @@ casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
|||||||
the character may be more than one unit long. In UTF-32 mode, characters
|
the character may be more than one unit long. In UTF-32 mode, characters
|
||||||
are always exactly one unit long.
|
are always exactly one unit long.
|
||||||
|
|
||||||
|
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||||
|
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||||
|
for something like [^a]).
|
||||||
|
|
||||||
|
|
||||||
Repeating single characters
|
Repeating single characters
|
||||||
---------------------------
|
---------------------------
|
||||||
@ -232,10 +249,9 @@ following opcodes, which come in caseful and caseless versions:
|
|||||||
|
|
||||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||||
UTF-32 mode these are one-unit items.
|
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||||
Those with "MIN" in their names are the minimizing versions. Those with "POS"
|
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||||
in their names are possessive versions. Other repeats make use of these
|
Other repeats make use of these opcodes:
|
||||||
opcodes:
|
|
||||||
|
|
||||||
Caseful Caseless
|
Caseful Caseless
|
||||||
OP_UPTO OP_UPTOI
|
OP_UPTO OP_UPTOI
|
||||||
@ -243,10 +259,15 @@ opcodes:
|
|||||||
OP_POSUPTO OP_POSUPTOI
|
OP_POSUPTO OP_POSUPTOI
|
||||||
OP_EXACT OP_EXACTI
|
OP_EXACT OP_EXACTI
|
||||||
|
|
||||||
Each of these is followed by a two-byte (one short) count (most significant
|
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||||
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
|
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||||
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
|
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||||
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
OPT_POSUPTO).
|
||||||
|
|
||||||
|
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||||
|
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||||
|
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||||
|
positive single-character classes.
|
||||||
|
|
||||||
|
|
||||||
Repeating character types
|
Repeating character types
|
||||||
@ -277,7 +298,10 @@ Match by Unicode property
|
|||||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||||
character by testing its Unicode property (the \p and \P escape sequences).
|
character by testing its Unicode property (the \p and \P escape sequences).
|
||||||
Each is followed by two units that encode the desired property as a type and a
|
Each is followed by two units that encode the desired property as a type and a
|
||||||
value.
|
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||||
|
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||||
|
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||||
|
PT_SC (Script).
|
||||||
|
|
||||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||||
@ -287,67 +311,88 @@ value.
|
|||||||
Character classes
|
Character classes
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
|
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||||
something like [^a]).
|
something like [^a]).
|
||||||
|
|
||||||
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
|
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||||
repeated, negated, single-character classes. The normal single-character
|
negated, single-character classes. The normal single-character opcodes
|
||||||
opcodes (OP_STAR, etc.) are used for repeated positive single-character
|
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||||
classes.
|
|
||||||
|
|
||||||
When there is more than one character in a class and all the characters are
|
When there is more than one character in a class, and all the code points are
|
||||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||||
negative one. In either case, the opcode is followed by a 32-byte (16-short)
|
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||||
bit map containing a 1 bit for every character that is acceptable. The bits are
|
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||||
counted from the least significant end of each unit. In caseless mode, bits for
|
bits are counted from the least significant end of each unit. In caseless mode,
|
||||||
both cases are set.
|
bits for both cases are set.
|
||||||
|
|
||||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
|
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||||
subject characters with values greater than 255 can be handled correctly. For
|
mode, subject characters with values greater than 255 can be handled correctly.
|
||||||
OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||||
|
|
||||||
For classes containing characters with values greater than 255, OP_XCLASS is
|
For classes containing characters with values greater than 255 or that contain
|
||||||
used. It optionally uses a bit map (if any characters lie within it), followed
|
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||||
by a list of pairs (for a range) and single characters. In caseless mode, both
|
are less than 256, followed by a list of pairs (for a range) and single
|
||||||
cases are explicitly listed. There is a flag character than indicates whether
|
characters. In caseless mode, both cases are explicitly listed.
|
||||||
it is a positive or a negative class.
|
|
||||||
|
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||||
|
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||||
|
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||||
|
coded as follows:
|
||||||
|
|
||||||
|
XCL_END marks the end of the list
|
||||||
|
XCL_SINGLE one character follows
|
||||||
|
XCL_RANGE two characters follow
|
||||||
|
XCL_PROP a Unicode property (type, value) follows
|
||||||
|
XCL_NOTPROP a Unicode property (type, value) follows
|
||||||
|
|
||||||
|
If a range starts with a code point less than 256 and ends with one greater
|
||||||
|
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||||
|
This means that if no other items in the class set bits in the map, a map is
|
||||||
|
not needed.
|
||||||
|
|
||||||
|
|
||||||
Back references
|
Back references
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
|
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||||
containing the reference number.
|
reference number if the reference is to a unique capturing group (either by
|
||||||
|
number or by name). When named groups are used, there may be more than one
|
||||||
|
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||||
|
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||||
|
in the group name table of the first entry for the requred name, followed by
|
||||||
|
the number of groups with the same name.
|
||||||
|
|
||||||
|
|
||||||
Repeating character classes and back references
|
Repeating character classes and back references
|
||||||
-----------------------------------------------
|
-----------------------------------------------
|
||||||
|
|
||||||
Single-character classes are handled specially (see above). This section
|
Single-character classes are handled specially (see above). This section
|
||||||
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
|
applies to other classes and also to back references. In both cases, the repeat
|
||||||
follows the base item. The matching code looks at the following opcode to see
|
information follows the base item. The matching code looks at the following
|
||||||
if it is one of
|
opcode to see if it is one of
|
||||||
|
|
||||||
OP_CRSTAR
|
OP_CRSTAR
|
||||||
OP_CRMINSTAR
|
OP_CRMINSTAR
|
||||||
|
OP_CRPOSSTAR
|
||||||
OP_CRPLUS
|
OP_CRPLUS
|
||||||
OP_CRMINPLUS
|
OP_CRMINPLUS
|
||||||
|
OP_CRPOSPLUS
|
||||||
OP_CRQUERY
|
OP_CRQUERY
|
||||||
OP_CRMINQUERY
|
OP_CRMINQUERY
|
||||||
|
OP_CRPOSQUERY
|
||||||
OP_CRRANGE
|
OP_CRRANGE
|
||||||
OP_CRMINRANGE
|
OP_CRMINRANGE
|
||||||
|
OP_CRPOSRANGE
|
||||||
|
|
||||||
All but the last two are just single-unit items. The others are followed by
|
All but the last three are single-unit items, with no data. The others are
|
||||||
four bytes (two shorts) of data, comprising the minimum and maximum repeat
|
followed by the minimum and maximum repeat counts.
|
||||||
counts. There are no special possessive opcodes for these repeats; a possessive
|
|
||||||
repeat is compiled into an atomic group.
|
|
||||||
|
|
||||||
|
|
||||||
Brackets and alternation
|
Brackets and alternation
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
A pair of non-capturing round brackets is wrapped round each expression at
|
||||||
compile time, so alternation always happens in the context of brackets.
|
compile time, so alternation always happens in the context of brackets.
|
||||||
|
|
||||||
[Note for North Americans: "bracket" to some English speakers, including
|
[Note for North Americans: "bracket" to some English speakers, including
|
||||||
@ -364,20 +409,20 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
|||||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||||
number immediately follows the offset, always as a 2-byte (one short) item.
|
number is a count that immediately follows the offset.
|
||||||
|
|
||||||
OP_KET is used for subpatterns that do not repeat indefinitely, and
|
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||||
maximally respectively (see below for possessive repetitions). All three are
|
respectively (see below for possessive repetitions). All three are followed by
|
||||||
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
|
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||||
the matching bracket opcode.
|
bracket opcode.
|
||||||
|
|
||||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||||
single-unit opcodes that tell the matcher that skipping the following
|
single-unit opcodes that tell the matcher that skipping the following
|
||||||
subpattern entirely is a valid branch. In the case of the first two, not
|
subpattern entirely is a valid branch. In the case of the first two, not
|
||||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||||
because it may be called as a subroutine from elsewhere in the regex.
|
because it may be called as a subroutine from elsewhere in the regex.
|
||||||
|
|
||||||
A subpattern with an indefinite maximum repetition is replicated in the
|
A subpattern with an indefinite maximum repetition is replicated in the
|
||||||
@ -397,6 +442,7 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
|||||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||||
OP_KETRMAX, and if so, to break the loop.
|
OP_KETRMAX, and if so, to break the loop.
|
||||||
|
|
||||||
|
|
||||||
Possessive brackets
|
Possessive brackets
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
@ -407,26 +453,34 @@ of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
|||||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||||
|
|
||||||
|
|
||||||
|
Once-only (atomic) groups
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
These are just like other subpatterns, but they start with the opcode
|
||||||
|
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||||
|
within the atomic group; the latter when there are. The distinction is needed
|
||||||
|
for when there is a backtrack to before the group - any captures within the
|
||||||
|
group must be reset, so it is necessary to retain backtracking points inside
|
||||||
|
the group even after it is complete in order to do this. When there are no
|
||||||
|
captures in an atomic group, all the backtracking can be discarded when it is
|
||||||
|
complete. This is more efficient, and also uses less stack.
|
||||||
|
|
||||||
|
The check for matching an empty string in an unbounded repeat is handled
|
||||||
|
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
----------
|
----------
|
||||||
|
|
||||||
Forward assertions are just like other subpatterns, but starting with one of
|
Forward assertions are also just like other subpatterns, but starting with one
|
||||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||||
is OP_REVERSE, followed by a two byte (one short) count of the number of
|
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||||
characters to move back the pointer in the subject string. In ASCII mode, the
|
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||||
count is a number of units, but in UTF-8/16 mode each character may occupy more
|
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||||
than one unit; in UTF-32 mode each character occupies exactly one unit.
|
mode each character occupies exactly one unit. A separate count is present in
|
||||||
A separate count is present in each alternative of a lookbehind
|
each alternative of a lookbehind assertion, allowing them to have different
|
||||||
assertion, allowing them to have different fixed lengths.
|
fixed lengths.
|
||||||
|
|
||||||
|
|
||||||
Once-only (atomic) subpatterns
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
These are also just like other subpatterns, but they start with the opcode
|
|
||||||
OP_ONCE. The check for matching an empty string in an unbounded repeat is
|
|
||||||
handled entirely at runtime, so there is just this one opcode.
|
|
||||||
|
|
||||||
|
|
||||||
Conditional subpatterns
|
Conditional subpatterns
|
||||||
@ -435,28 +489,29 @@ Conditional subpatterns
|
|||||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||||
the condition is a back reference, this is stored at the start of the
|
the condition is a back reference, this is stored at the start of the
|
||||||
subpattern using the opcode OP_CREF followed by two bytes (one short)
|
subpattern using the opcode OP_CREF followed by a count containing the
|
||||||
containing the reference number. OP_NCREF is used instead if the reference was
|
reference number, provided that the reference is to a unique capturing group.
|
||||||
generated by name (so that the runtime code knows to check for duplicate
|
If the reference was by name and there is more than one group with that name,
|
||||||
names).
|
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||||
|
names table, and the number of groups with the same name.
|
||||||
|
|
||||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||||
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
|
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||||
zero for "the whole pattern". For a DEFINE condition, just the single unit
|
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||||
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
|
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||||
always starts with one of the assertions.
|
conditional subpattern always starts with one of the assertions.
|
||||||
|
|
||||||
|
|
||||||
Recursion
|
Recursion
|
||||||
---------
|
---------
|
||||||
|
|
||||||
Recursion either matches the current regex, or some subexpression. The opcode
|
Recursion either matches the current regex, or some subexpression. The opcode
|
||||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||||
are not strictly a recursion.
|
not strictly a recursion.
|
||||||
|
|
||||||
|
|
||||||
Callout
|
Callout
|
||||||
@ -464,10 +519,10 @@ Callout
|
|||||||
|
|
||||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||||
cases there follows a two-byte (one short) value giving the offset in the
|
cases there follows a count giving the offset in the pattern string to the
|
||||||
pattern to the start of the following item, and another two-byte (one short)
|
start of the following item, and another count giving the length of this item.
|
||||||
item giving the length of the next item.
|
These values make is possible for pcretest to output useful tracing information
|
||||||
|
using automatic callouts.
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
February 2012
|
November 2013
|
||||||
|
47
pcre/NEWS
47
pcre/NEWS
@ -1,6 +1,53 @@
|
|||||||
News about PCRE releases
|
News about PCRE releases
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
Release 8.34 15-December-2013
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
As well as fixing the inevitable bugs, performance has been improved by
|
||||||
|
refactoring and extending the amount of "auto-possessification" that PCRE does.
|
||||||
|
Other notable changes:
|
||||||
|
|
||||||
|
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||||
|
an empty string. If it can, pcretest shows this in its information output.
|
||||||
|
|
||||||
|
. A back reference to a named subpattern when there is more than one of the
|
||||||
|
same name now checks them in the order in which they appear in the pattern.
|
||||||
|
The first one that is set is used for the reference. Previously only the
|
||||||
|
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||||
|
|
||||||
|
. Unicode character properties were updated from Unicode 6.3.0.
|
||||||
|
|
||||||
|
. The character VT has been added to the set of characters that match \s and
|
||||||
|
are generally treated as white space, following this same change in Perl
|
||||||
|
5.18. There is now no difference between "Perl space" and "POSIX space".
|
||||||
|
|
||||||
|
. Perl has changed its handling of \8 and \9. If there is no previously
|
||||||
|
encountered capturing group of those numbers, they are treated as the
|
||||||
|
literal characters 8 and 9 instead of a binary zero followed by the
|
||||||
|
literals. PCRE now does the same.
|
||||||
|
|
||||||
|
. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||||
|
possible to specify values greater than 0777 and also making them
|
||||||
|
unambiguous.
|
||||||
|
|
||||||
|
. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||||
|
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||||
|
were matched by \h.
|
||||||
|
|
||||||
|
. Add JIT support for the 64 bit TileGX architecture.
|
||||||
|
|
||||||
|
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||||
|
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||||
|
does in Unicode mode.
|
||||||
|
|
||||||
|
. Perl no longer allows group names to start with digits, so I have made this
|
||||||
|
change also in PCRE.
|
||||||
|
|
||||||
|
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||||
|
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||||
|
|
||||||
|
|
||||||
Release 8.33 28-May-2013
|
Release 8.33 28-May-2013
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
@ -171,8 +171,8 @@ can skip ahead to the CMake section.
|
|||||||
pcre16_version.c
|
pcre16_version.c
|
||||||
pcre16_xclass.c
|
pcre16_xclass.c
|
||||||
|
|
||||||
(8) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
pcre32_byte_order.c
|
pcre32_byte_order.c
|
||||||
pcre32_chartables.c
|
pcre32_chartables.c
|
||||||
|
28
pcre/README
28
pcre/README
@ -9,8 +9,10 @@ from:
|
|||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||||
|
|
||||||
There is a mailing list for discussion about the development of PCRE at
|
There is a mailing list for discussion about the development of PCRE at
|
||||||
|
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||||
|
subscription here:
|
||||||
|
|
||||||
pcre-dev@exim.org
|
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||||
|
|
||||||
Please read the NEWS file if you are upgrading from a previous release.
|
Please read the NEWS file if you are upgrading from a previous release.
|
||||||
The contents of this README file are:
|
The contents of this README file are:
|
||||||
@ -112,6 +114,11 @@ contributions provided support for compiling PCRE on various flavours of
|
|||||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||||
in the standard distribution, so these contibutions have been archived.
|
in the standard distribution, so these contibutions have been archived.
|
||||||
|
|
||||||
|
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||||
|
pcretest programs here:
|
||||||
|
|
||||||
|
http://www.rexegg.com/pcregrep-pcretest.html
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on non-Unix-like systems
|
Building PCRE on non-Unix-like systems
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
@ -262,9 +269,17 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
|
|
||||||
on the "configure" command.
|
on the "configure" command.
|
||||||
|
|
||||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||||
If the limit is exceeded during a match, the match fails. The default is ten
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
million. You can change the default by setting, for example,
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
|
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||||
|
when matching a pattern. If the limit is exceeded during a match, the match
|
||||||
|
fails. The default is ten million. You can change the default by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
--with-match-limit=500000
|
--with-match-limit=500000
|
||||||
|
|
||||||
@ -344,7 +359,8 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
report is generated by running "make coverage". If ccache is installed on
|
report is generated by running "make coverage". If ccache is installed on
|
||||||
your system, it must be disabled when building PCRE for coverage reporting.
|
your system, it must be disabled when building PCRE for coverage reporting.
|
||||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||||
running "make" to build PCRE.
|
running "make" to build PCRE. There is more information about coverage
|
||||||
|
reporting in the "pcrebuild" documentation.
|
||||||
|
|
||||||
. The pcregrep program currently supports only 8-bit data files, and so
|
. The pcregrep program currently supports only 8-bit data files, and so
|
||||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||||
@ -971,4 +987,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 28 April 2013
|
Last updated: 05 November 2013
|
||||||
|
@ -502,6 +502,10 @@ echo "---------------------------- Test 105 -----------------------------" >>tes
|
|||||||
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1
|
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1
|
||||||
echo "RC=$?" >>testtry
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
echo "---------------------------- Test 106 -----------------------------" >>testtry
|
||||||
|
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtry 2>&1
|
||||||
|
echo "RC=$?" >>testtry
|
||||||
|
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
|
||||||
|
99
pcre/RunTest
99
pcre/RunTest
@ -14,11 +14,11 @@
|
|||||||
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
||||||
# disabled with /SS.
|
# disabled with /SS.
|
||||||
#
|
#
|
||||||
# When JIT support is available, all the tests are also run with -s+ to test
|
# When JIT support is available, all appropriate tests are also run with -s+ to
|
||||||
# (again, almost) everything with studying and the JIT option, unless "nojit"
|
# test (again, almost) everything with studying and the JIT option, unless
|
||||||
# is given on the command line. There are also two tests for JIT-specific
|
# "nojit" is given on the command line. There are also two tests for
|
||||||
# features, one to be run when JIT support is available (unless "nojit" is
|
# JIT-specific features, one to be run when JIT support is available (unless
|
||||||
# specified), and one when it is not.
|
# "nojit" is specified), and one when it is not.
|
||||||
#
|
#
|
||||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||||
@ -30,9 +30,13 @@
|
|||||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||||
# in numerical order.
|
# in numerical order.
|
||||||
|
#
|
||||||
|
# Inappropriate tests are automatically skipped (with a comment to say so): for
|
||||||
|
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
|
||||||
|
# support is compiled, test 13 is skipped.
|
||||||
|
#
|
||||||
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
||||||
# followed by an argument to run cross- compiled executables under a simulator,
|
# followed by an argument to run cross-compiled executables under a simulator,
|
||||||
# for example:
|
# for example:
|
||||||
#
|
#
|
||||||
# RunTest 3 sim "qemu-arm -s 8388608"
|
# RunTest 3 sim "qemu-arm -s 8388608"
|
||||||
@ -62,8 +66,8 @@ title8="Test 8: DFA matching main functionality"
|
|||||||
title9="Test 9: DFA matching with UTF"
|
title9="Test 9: DFA matching with UTF"
|
||||||
title10="Test 10: DFA matching with Unicode properties"
|
title10="Test 10: DFA matching with Unicode properties"
|
||||||
title11="Test 11: Internal offsets and code size tests"
|
title11="Test 11: Internal offsets and code size tests"
|
||||||
title12="Test 12: JIT-specific features (JIT available)"
|
title12="Test 12: JIT-specific features (when JIT is available)"
|
||||||
title13="Test 13: JIT-specific features (JIT not available)"
|
title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||||
title14="Test 14: Specials for the basic 8-bit library"
|
title14="Test 14: Specials for the basic 8-bit library"
|
||||||
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||||
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||||
@ -350,79 +354,6 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
|||||||
jitopt=-s+
|
jitopt=-s+
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle any explicit skips
|
|
||||||
|
|
||||||
for i in $skip; do eval do$i=no; done
|
|
||||||
|
|
||||||
# If any unsuitable tests were explicitly requested, grumble.
|
|
||||||
|
|
||||||
if [ $utf -eq 0 ] ; then
|
|
||||||
if [ $do4 = yes ] ; then
|
|
||||||
echo "Can't run test 4 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do5 = yes ] ; then
|
|
||||||
echo "Can't run test 5 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do9 = yes ] ; then
|
|
||||||
echo "Can't run test 8 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do15 = yes ] ; then
|
|
||||||
echo "Can't run test 15 because UTF support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do18 = yes ] ; then
|
|
||||||
echo "Can't run test 18 because UTF support is not configured"
|
|
||||||
fi
|
|
||||||
if [ $do22 = yes ] ; then
|
|
||||||
echo "Can't run test 22 because UTF support is not configured"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $ucp -eq 0 ] ; then
|
|
||||||
if [ $do6 = yes ] ; then
|
|
||||||
echo "Can't run test 6 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do7 = yes ] ; then
|
|
||||||
echo "Can't run test 7 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do10 = yes ] ; then
|
|
||||||
echo "Can't run test 10 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do16 = yes ] ; then
|
|
||||||
echo "Can't run test 16 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
if [ $do19 = yes ] ; then
|
|
||||||
echo "Can't run test 19 because Unicode property support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $link_size -ne 2 ] ; then
|
|
||||||
if [ $do11 = yes ] ; then
|
|
||||||
echo "Can't run test 11 because the link size ($link_size) is not 2"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $jit -eq 0 ] ; then
|
|
||||||
if [ $do12 = "yes" ] ; then
|
|
||||||
echo "Can't run test 12 because JIT support is not configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if [ $do13 = "yes" ] ; then
|
|
||||||
echo "Can't run test 13 because JIT support is configured"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If no specific tests were requested, select all. Those that are not
|
# If no specific tests were requested, select all. Those that are not
|
||||||
# relevant will be automatically skipped.
|
# relevant will be automatically skipped.
|
||||||
|
|
||||||
@ -461,8 +392,8 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
|||||||
do26=yes
|
do26=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle any explicit skips (again, so that an argument list may consist only
|
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||||
# of explicit skips).
|
# only of explicit skips.
|
||||||
|
|
||||||
for i in $skip; do eval do$i=no; done
|
for i in $skip; do eval do$i=no; done
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
#cmakedefine HAVE_SYS_TYPES_H 1
|
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||||
#cmakedefine HAVE_UNISTD_H 1
|
#cmakedefine HAVE_UNISTD_H 1
|
||||||
#cmakedefine HAVE_WINDOWS_H 1
|
#cmakedefine HAVE_WINDOWS_H 1
|
||||||
#cmakedefine HAVE_STDINT_H 1
|
#cmakedefine HAVE_STDINT_H 1
|
||||||
#cmakedefine HAVE_INTTYPES_H 1
|
#cmakedefine HAVE_INTTYPES_H 1
|
||||||
|
|
||||||
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
||||||
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
||||||
@ -46,6 +46,7 @@
|
|||||||
#define NEWLINE @NEWLINE@
|
#define NEWLINE @NEWLINE@
|
||||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||||
|
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
|
||||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||||
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
||||||
|
@ -9,17 +9,17 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
|||||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
m4_define(pcre_major, [8])
|
m4_define(pcre_major, [8])
|
||||||
m4_define(pcre_minor, [33])
|
m4_define(pcre_minor, [34])
|
||||||
m4_define(pcre_prerelease, [])
|
m4_define(pcre_prerelease, [])
|
||||||
m4_define(pcre_date, [2013-05-28])
|
m4_define(pcre_date, [2013-12-15])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre_version, [3:1:2])
|
m4_define(libpcre_version, [3:2:2])
|
||||||
m4_define(libpcre16_version, [2:1:2])
|
m4_define(libpcre16_version, [2:2:2])
|
||||||
m4_define(libpcre32_version, [0:1:0])
|
m4_define(libpcre32_version, [0:2:0])
|
||||||
m4_define(libpcreposix_version, [0:2:0])
|
m4_define(libpcreposix_version, [0:2:0])
|
||||||
m4_define(libpcrecpp_version, [0:0:0])
|
m4_define(libpcrecpp_version, [0:0:0])
|
||||||
|
|
||||||
@ -275,6 +275,12 @@ AC_ARG_WITH(link-size,
|
|||||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||||
, with_link_size=2)
|
, with_link_size=2)
|
||||||
|
|
||||||
|
# Handle --with-parens-nest-limit=N
|
||||||
|
AC_ARG_WITH(parens-nest-limit,
|
||||||
|
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||||
|
[nested parentheses limit (default=250)]),
|
||||||
|
, with_parens_nest_limit=250)
|
||||||
|
|
||||||
# Handle --with-match-limit=N
|
# Handle --with-match-limit=N
|
||||||
AC_ARG_WITH(match-limit,
|
AC_ARG_WITH(match-limit,
|
||||||
AS_HELP_STRING([--with-match-limit=N],
|
AS_HELP_STRING([--with-match-limit=N],
|
||||||
@ -784,6 +790,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
|||||||
faster than using malloc() for each call. The threshold above which
|
faster than using malloc() for each call. The threshold above which
|
||||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||||
|
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern.])
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||||
The value of MATCH_LIMIT determines the default number of times the
|
The value of MATCH_LIMIT determines the default number of times the
|
||||||
internal match() function can be called during a single execution of
|
internal match() function can be called during a single execution of
|
||||||
@ -957,7 +968,7 @@ if test "$enable_pcretest_libreadline" = "yes"; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check for valgrind
|
# Handle valgrind support
|
||||||
|
|
||||||
if test "$enable_valgrind" = "yes"; then
|
if test "$enable_valgrind" = "yes"; then
|
||||||
m4_ifdef([PKG_CHECK_MODULES],
|
m4_ifdef([PKG_CHECK_MODULES],
|
||||||
@ -965,7 +976,7 @@ if test "$enable_valgrind" = "yes"; then
|
|||||||
[AC_MSG_ERROR([pkg-config not supported])])
|
[AC_MSG_ERROR([pkg-config not supported])])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# test code coverage reporting
|
# Handle code coverage reporting support
|
||||||
if test "$enable_coverage" = "yes"; then
|
if test "$enable_coverage" = "yes"; then
|
||||||
if test "x$GCC" != "xyes"; then
|
if test "x$GCC" != "xyes"; then
|
||||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||||
@ -996,11 +1007,7 @@ if test "$enable_coverage" = "yes"; then
|
|||||||
AC_MSG_ERROR([genhtml not found])
|
AC_MSG_ERROR([genhtml not found])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE([SUPPORT_GCOV],[1], [
|
# Set flags needed for gcov
|
||||||
Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
|
||||||
are able to generate code coverage reports.])
|
|
||||||
|
|
||||||
# And add flags needed for gcov
|
|
||||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||||
GCOV_LIBS="-lgcov"
|
GCOV_LIBS="-lgcov"
|
||||||
@ -1075,6 +1082,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||||
Internal link size .............. : ${with_link_size}
|
Internal link size .............. : ${with_link_size}
|
||||||
|
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||||
Match limit ..................... : ${with_match_limit}
|
Match limit ..................... : ${with_match_limit}
|
||||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||||
Build shared libs ............... : ${enable_shared}
|
Build shared libs ............... : ${enable_shared}
|
||||||
|
@ -171,8 +171,8 @@ can skip ahead to the CMake section.
|
|||||||
pcre16_version.c
|
pcre16_version.c
|
||||||
pcre16_xclass.c
|
pcre16_xclass.c
|
||||||
|
|
||||||
(8) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||||
|
|
||||||
pcre32_byte_order.c
|
pcre32_byte_order.c
|
||||||
pcre32_chartables.c
|
pcre32_chartables.c
|
||||||
|
@ -9,8 +9,10 @@ from:
|
|||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||||
|
|
||||||
There is a mailing list for discussion about the development of PCRE at
|
There is a mailing list for discussion about the development of PCRE at
|
||||||
|
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||||
|
subscription here:
|
||||||
|
|
||||||
pcre-dev@exim.org
|
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||||
|
|
||||||
Please read the NEWS file if you are upgrading from a previous release.
|
Please read the NEWS file if you are upgrading from a previous release.
|
||||||
The contents of this README file are:
|
The contents of this README file are:
|
||||||
@ -112,6 +114,11 @@ contributions provided support for compiling PCRE on various flavours of
|
|||||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||||
in the standard distribution, so these contibutions have been archived.
|
in the standard distribution, so these contibutions have been archived.
|
||||||
|
|
||||||
|
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||||
|
pcretest programs here:
|
||||||
|
|
||||||
|
http://www.rexegg.com/pcregrep-pcretest.html
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on non-Unix-like systems
|
Building PCRE on non-Unix-like systems
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
@ -262,9 +269,17 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
|
|
||||||
on the "configure" command.
|
on the "configure" command.
|
||||||
|
|
||||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||||
If the limit is exceeded during a match, the match fails. The default is ten
|
pattern. This limits the amount of system stack that a pattern uses when it
|
||||||
million. You can change the default by setting, for example,
|
is compiled. The default is 250, but you can change it by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
|
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||||
|
when matching a pattern. If the limit is exceeded during a match, the match
|
||||||
|
fails. The default is ten million. You can change the default by setting, for
|
||||||
|
example,
|
||||||
|
|
||||||
--with-match-limit=500000
|
--with-match-limit=500000
|
||||||
|
|
||||||
@ -344,7 +359,8 @@ library. They are also documented in the pcrebuild man page.
|
|||||||
report is generated by running "make coverage". If ccache is installed on
|
report is generated by running "make coverage". If ccache is installed on
|
||||||
your system, it must be disabled when building PCRE for coverage reporting.
|
your system, it must be disabled when building PCRE for coverage reporting.
|
||||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||||
running "make" to build PCRE.
|
running "make" to build PCRE. There is more information about coverage
|
||||||
|
reporting in the "pcrebuild" documentation.
|
||||||
|
|
||||||
. The pcregrep program currently supports only 8-bit data files, and so
|
. The pcregrep program currently supports only 8-bit data files, and so
|
||||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||||
@ -971,4 +987,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 28 April 2013
|
Last updated: 05 November 2013
|
||||||
|
@ -23,8 +23,8 @@ man page, in case the conversion went wrong.
|
|||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
||||||
<b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
<b> [--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||||
<b>[--cflags] [--cflags-posix]</b>
|
<b> [--cflags] [--cflags-posix]</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -62,7 +62,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 6.2.0.
|
release 6.3.0.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
|
@ -42,126 +42,126 @@ man page, in case the conversion went wrong.
|
|||||||
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
<b>" PCRE_SPTR16 <i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const unsigned char *pcre16_maketables(void);</b>
|
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const char *pcre16_version(void);</b>
|
<b>const char *pcre16_version(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>void *(*pcre16_malloc)(size_t);</b>
|
<b>void *(*pcre16_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre16_free)(void *);</b>
|
<b>void (*pcre16_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre16_stack_free)(void *);</b>
|
<b>void (*pcre16_stack_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||||
<b>int <i>keep_boms</i>);</b>
|
<b> int <i>keep_boms</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
|
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -42,126 +42,125 @@ man page, in case the conversion went wrong.
|
|||||||
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<br>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
|
||||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>name</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const unsigned char *pcre32_maketables(void);</b>
|
<b>const unsigned char *pcre32_maketables(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const char *pcre32_version(void);</b>
|
<b>const char *pcre32_version(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||||
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>void *(*pcre32_malloc)(size_t);</b>
|
<b>void *(*pcre32_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre32_free)(void *);</b>
|
<b>void (*pcre32_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void *(*pcre32_stack_malloc)(size_t);</b>
|
<b>void *(*pcre32_stack_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre32_stack_free)(void *);</b>
|
<b>void (*pcre32_stack_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||||
<b>int <i>keep_boms</i>);</b>
|
<b> int <i>keep_boms</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
|
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||||
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
@ -65,6 +65,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -73,6 +74,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b>" int *<i>errorcodeptr</i>,£</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
@ -69,6 +69,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -77,6 +78,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -48,6 +48,7 @@ point to an unsigned long integer. The available codes are:
|
|||||||
target architecture for the JIT compiler,
|
target architecture for the JIT compiler,
|
||||||
or NULL if there is no JIT support
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
Internal recursion depth limit
|
Internal recursion depth limit
|
||||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>name</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b>int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b>int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||||
<b>int <i>maxsize</i>);</b>
|
<b> int <i>maxsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||||
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||||
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,8 +20,8 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||||
<b>int <i>keep_boms</i>);</b>
|
<b> int <i>keep_boms</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -20,8 +20,8 @@ SYNOPSIS
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||||
<b>PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||||
<b>int <i>keep_boms</i>);</b>
|
<b> int <i>keep_boms</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -46,125 +46,125 @@ man page, in case the conversion went wrong.
|
|||||||
<br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
|
<br><a name="SEC2" href="#TOC1">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||||
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const unsigned char *pcre_maketables(void);</b>
|
<b>const unsigned char *pcre_maketables(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>const char *pcre_version(void);</b>
|
<b>const char *pcre_version(void);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||||
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>void *(*pcre_malloc)(size_t);</b>
|
<b>void *(*pcre_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre_free)(void *);</b>
|
<b>void (*pcre_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void *(*pcre_stack_malloc)(size_t);</b>
|
<b>void *(*pcre_stack_malloc)(size_t);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>void (*pcre_stack_free)(void *);</b>
|
<b>void (*pcre_stack_free)(void *);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||||
@ -483,6 +483,13 @@ interface uses <b>malloc()</b> for output vectors. Further details are given in
|
|||||||
the
|
the
|
||||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
|
<pre>
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT
|
||||||
|
</pre>
|
||||||
|
The output is a long integer that gives the maximum depth of nesting of
|
||||||
|
parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
|
||||||
|
of system stack used when a pattern is compiled. It is specified when PCRE is
|
||||||
|
built; the default is 250.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_CONFIG_MATCH_LIMIT
|
PCRE_CONFIG_MATCH_LIMIT
|
||||||
</pre>
|
</pre>
|
||||||
@ -509,12 +516,14 @@ avoiding the use of the stack.
|
|||||||
<br><a name="SEC11" href="#TOC1">COMPILING A PATTERN</a><br>
|
<br><a name="SEC11" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||||
<b>int *<i>errorcodeptr</i>,</b>
|
<b> int *<i>errorcodeptr</i>,</b>
|
||||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Either of the functions <b>pcre_compile()</b> or <b>pcre_compile2()</b> can be
|
Either of the functions <b>pcre_compile()</b> or <b>pcre_compile2()</b> can be
|
||||||
@ -580,8 +589,9 @@ If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default set of
|
|||||||
character tables that are built when PCRE is compiled, using the default C
|
character tables that are built when PCRE is compiled, using the default C
|
||||||
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
|
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
|
||||||
call to <b>pcre_maketables()</b>. This value is stored with the compiled
|
call to <b>pcre_maketables()</b>. This value is stored with the compiled
|
||||||
pattern, and used again by <b>pcre_exec()</b>, unless another table pointer is
|
pattern, and used again by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> when the
|
||||||
passed to it. For more discussion, see the section on locale support below.
|
pattern is matched. For more discussion, see the section on locale support
|
||||||
|
below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
|
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
|
||||||
@ -666,12 +676,24 @@ documentation.
|
|||||||
<pre>
|
<pre>
|
||||||
PCRE_EXTENDED
|
PCRE_EXTENDED
|
||||||
</pre>
|
</pre>
|
||||||
If this bit is set, white space data characters in the pattern are totally
|
If this bit is set, most white space characters in the pattern are totally
|
||||||
ignored except when escaped or inside a character class. White space does not
|
ignored except when escaped or inside a character class. However, white space
|
||||||
include the VT character (code 11). In addition, characters between an
|
is not allowed within sequences such as (?> that introduce various
|
||||||
unescaped # outside a character class and the next newline, inclusive, are also
|
parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
|
||||||
ignored. This is equivalent to Perl's /x option, and it can be changed within a
|
However, ignorable white space is permitted between an item and a following
|
||||||
pattern by a (?x) option setting.
|
quantifier and between a quantifier and a following + that indicates
|
||||||
|
possessiveness.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
White space did not used to include the VT character (code 11), because Perl
|
||||||
|
did not treat this character as white space. However, Perl changed at release
|
||||||
|
5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
PCRE_EXTENDED also causes characters between an unescaped # outside a character
|
||||||
|
class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
|
||||||
|
equivalent to Perl's /x option, and it can be changed within a pattern by a
|
||||||
|
(?x) option setting.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Which characters are interpreted as newlines is controlled by the options
|
Which characters are interpreted as newlines is controlled by the options
|
||||||
@ -824,6 +846,15 @@ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
|||||||
were followed by ?: but named parentheses can still be used for capturing (and
|
were followed by ?: but named parentheses can still be used for capturing (and
|
||||||
they acquire numbers in the usual way). There is no equivalent of this option
|
they acquire numbers in the usual way). There is no equivalent of this option
|
||||||
in Perl.
|
in Perl.
|
||||||
|
<pre>
|
||||||
|
PCRE_NO_AUTO_POSSESS
|
||||||
|
</pre>
|
||||||
|
If this option is set, it disables "auto-possessification". This is an
|
||||||
|
optimization that, for example, turns a+b into a++b in order to avoid
|
||||||
|
backtracks into a+ that can never be successful. However, if callouts are in
|
||||||
|
use, auto-possessification means that some of them are never taken. You can set
|
||||||
|
this option if you want the matching functions to do a full unoptimized search
|
||||||
|
and run all the callouts, but it is mainly provided for testing purposes.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_NO_START_OPTIMIZE
|
PCRE_NO_START_OPTIMIZE
|
||||||
</pre>
|
</pre>
|
||||||
@ -875,10 +906,10 @@ page. If an invalid UTF-8 sequence is found, <b>pcre_compile()</b> returns an
|
|||||||
error. If you already know that your pattern is valid, and you want to skip
|
error. If you already know that your pattern is valid, and you want to skip
|
||||||
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
||||||
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
||||||
undefined. It may cause your program to crash. Note that this option can also
|
undefined. It may cause your program to crash or loop. Note that this option
|
||||||
be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress the
|
can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress
|
||||||
validity checking of subject strings only. If the same string is being matched
|
the validity checking of subject strings only. If the same string is being
|
||||||
many times, the option can be safely set for the second and subsequent
|
matched many times, the option can be safely set for the second and subsequent
|
||||||
matchings to improve performance.
|
matchings to improve performance.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
<br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||||
@ -923,7 +954,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
31 POSIX collating elements are not supported
|
31 POSIX collating elements are not supported
|
||||||
32 this version of PCRE is compiled without UTF support
|
32 this version of PCRE is compiled without UTF support
|
||||||
33 [this code is not in use]
|
33 [this code is not in use]
|
||||||
34 character value in \x{...} sequence is too large
|
34 character value in \x{} or \o{} is too large
|
||||||
35 invalid condition (?(0)
|
35 invalid condition (?(0)
|
||||||
36 \C not allowed in lookbehind assertion
|
36 \C not allowed in lookbehind assertion
|
||||||
37 PCRE does not support \L, \l, \N{name}, \U, or \u
|
37 PCRE does not support \L, \l, \N{name}, \U, or \u
|
||||||
@ -971,14 +1002,20 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
76 character value in \u.... sequence is too large
|
76 character value in \u.... sequence is too large
|
||||||
77 invalid UTF-32 string (specifically UTF-32)
|
77 invalid UTF-32 string (specifically UTF-32)
|
||||||
|
78 setting UTF is disabled by the application
|
||||||
|
79 non-hex character in \x{} (closing brace missing?)
|
||||||
|
80 non-octal character in \o{} (closing brace missing?)
|
||||||
|
81 missing opening brace after \o
|
||||||
|
82 parentheses are too deeply nested
|
||||||
|
83 invalid range in character class
|
||||||
</pre>
|
</pre>
|
||||||
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
||||||
be used if the limits were changed when PCRE was built.
|
be used if the limits were changed when PCRE was built.
|
||||||
<a name="studyingapattern"></a></P>
|
<a name="studyingapattern"></a></P>
|
||||||
<br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
|
<br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>
|
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||||
<b>const char **<i>errptr</i>);</b>
|
<b> const char **<i>errptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If a compiled pattern is going to be used several times, it is worth spending
|
If a compiled pattern is going to be used several times, it is worth spending
|
||||||
@ -1101,15 +1138,18 @@ There is a longer discussion of PCRE_NO_START_OPTIMIZE
|
|||||||
<P>
|
<P>
|
||||||
PCRE handles caseless matching, and determines whether characters are letters,
|
PCRE handles caseless matching, and determines whether characters are letters,
|
||||||
digits, or whatever, by reference to a set of tables, indexed by character
|
digits, or whatever, by reference to a set of tables, indexed by character
|
||||||
value. When running in UTF-8 mode, this applies only to characters
|
code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
|
||||||
with codes less than 128. By default, higher-valued codes never match escapes
|
applies only to characters with code points less than 256. By default,
|
||||||
such as \w or \d, but they can be tested with \p if PCRE is built with
|
higher-valued code points never match escapes such as \w or \d. However, if
|
||||||
Unicode character property support. Alternatively, the PCRE_UCP option can be
|
PCRE is built with Unicode property support, all characters can be tested with
|
||||||
set at compile time; this causes \w and friends to use Unicode property
|
\p and \P, or, alternatively, the PCRE_UCP option can be set when a pattern
|
||||||
support instead of built-in tables. The use of locales with Unicode is
|
is compiled; this causes \w and friends to use Unicode property support
|
||||||
discouraged. If you are handling characters with codes greater than 128, you
|
instead of the built-in tables.
|
||||||
should either use UTF-8 and Unicode, or use locales, but not try to mix the
|
</P>
|
||||||
two.
|
<P>
|
||||||
|
The use of locales with Unicode is discouraged. If you are handling characters
|
||||||
|
with code points greater than 128, you should either use Unicode support, or
|
||||||
|
use locales, but not try to mix the two.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE contains an internal set of tables that are used when the final argument
|
PCRE contains an internal set of tables that are used when the final argument
|
||||||
@ -1127,10 +1167,10 @@ for this locale support is expected to die away.
|
|||||||
<P>
|
<P>
|
||||||
External tables are built by calling the <b>pcre_maketables()</b> function,
|
External tables are built by calling the <b>pcre_maketables()</b> function,
|
||||||
which has no arguments, in the relevant locale. The result can then be passed
|
which has no arguments, in the relevant locale. The result can then be passed
|
||||||
to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
|
to <b>pcre_compile()</b> as often as necessary. For example, to build and use
|
||||||
example, to build and use tables that are appropriate for the French locale
|
tables that are appropriate for the French locale (where accented characters
|
||||||
(where accented characters with values greater than 128 are treated as letters),
|
with values greater than 128 are treated as letters), the following code could
|
||||||
the following code could be used:
|
be used:
|
||||||
<pre>
|
<pre>
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
tables = pcre_maketables();
|
tables = pcre_maketables();
|
||||||
@ -1148,21 +1188,25 @@ needed.
|
|||||||
<P>
|
<P>
|
||||||
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
|
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
|
||||||
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
|
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
|
||||||
and normally also by <b>pcre_exec()</b>. Thus, by default, for any single
|
and also by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>. Thus, for any single
|
||||||
pattern, compilation, studying and matching all happen in the same locale, but
|
pattern, compilation, studying and matching all happen in the same locale, but
|
||||||
different patterns can be compiled in different locales.
|
different patterns can be processed in different locales.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is possible to pass a table pointer or NULL (indicating the use of the
|
It is possible to pass a table pointer or NULL (indicating the use of the
|
||||||
internal tables) to <b>pcre_exec()</b>. Although not intended for this purpose,
|
internal tables) to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> (see the
|
||||||
this facility could be used to match a pattern in a different locale from the
|
discussion below in the section on matching a pattern). This facility is
|
||||||
one in which it was compiled. Passing table pointers at run time is discussed
|
provided for use with pre-compiled patterns that have been saved and reloaded.
|
||||||
below in the section on matching a pattern.
|
Character tables are not saved with patterns, so if a non-standard table was
|
||||||
|
used at compile time, it must be provided again when the reloaded pattern is
|
||||||
|
matched. Attempting to use this facility to match a pattern in a different
|
||||||
|
locale from the one in which it was compiled is likely to lead to anomalous
|
||||||
|
(usually incorrect) results.
|
||||||
<a name="infoaboutpattern"></a></P>
|
<a name="infoaboutpattern"></a></P>
|
||||||
<br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
|
<br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre_fullinfo()</b> function returns information about a compiled
|
The <b>pcre_fullinfo()</b> function returns information about a compiled
|
||||||
@ -1303,9 +1347,14 @@ is -1.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
||||||
to return the full 32-bit range of the character, this value is deprecated;
|
to return the full 32-bit range of characters, this value is deprecated;
|
||||||
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
||||||
be used.
|
be used.
|
||||||
|
<pre>
|
||||||
|
PCRE_INFO_MATCH_EMPTY
|
||||||
|
</pre>
|
||||||
|
Return 1 if the pattern can match an empty string, otherwise 0. The fourth
|
||||||
|
argument should point to an <b>int</b> variable.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_MATCHLIMIT
|
PCRE_INFO_MATCHLIMIT
|
||||||
</pre>
|
</pre>
|
||||||
@ -1364,16 +1413,18 @@ contains the parenthesis number. The rest of the entry is the corresponding
|
|||||||
name, zero terminated.
|
name, zero terminated.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The names are in alphabetical order. Duplicate names may appear if (?| is used
|
The names are in alphabetical order. If (?| is used to create multiple groups
|
||||||
to create multiple groups with the same number, as described in the
|
with the same number, as described in the
|
||||||
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
|
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||||
page. Duplicate names for subpatterns with different numbers are permitted only
|
page, the groups may be given the same name, but there is only one entry in the
|
||||||
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
|
table. Different names for groups of the same number are not permitted.
|
||||||
table in the order in which they were found in the pattern. In the absence of
|
Duplicate names for subpatterns with different numbers are permitted,
|
||||||
(?| this is the order of increasing number; when (?| is used this is not
|
but only if PCRE_DUPNAMES is set. They appear in the table in the order in
|
||||||
necessarily the case because later subpatterns may have lower numbers.
|
which they were found in the pattern. In the absence of (?| this is the order
|
||||||
|
of increasing number; when (?| is used this is not necessarily the case because
|
||||||
|
later subpatterns may have lower numbers.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
As a simple example of the name/number table, consider the following pattern
|
As a simple example of the name/number table, consider the following pattern
|
||||||
@ -1487,30 +1538,14 @@ returned. For anchored patterns, 0 is returned.
|
|||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_FIRSTCHARACTER
|
PCRE_INFO_FIRSTCHARACTER
|
||||||
</pre>
|
</pre>
|
||||||
Return the fixed first character value, if PCRE_INFO_FIRSTCHARACTERFLAGS
|
Return the fixed first character value in the situation where
|
||||||
returned 1; otherwise returns 0. The fourth argument should point to an
|
PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; otherwise return 0. The fourth
|
||||||
<b>uint_t</b> variable.
|
argument should point to an <b>uint_t</b> variable.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
||||||
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
||||||
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If there is no fixed first value, and if either
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
|
||||||
starts with "^", or
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
|
||||||
(if it were set, the pattern would be anchored),
|
|
||||||
<br>
|
|
||||||
<br>
|
|
||||||
-1 is returned, indicating that the pattern matches only at the start of a
|
|
||||||
subject string or after any newline within the string. Otherwise -2 is
|
|
||||||
returned. For anchored patterns, -2 is returned.
|
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_INFO_REQUIREDCHARFLAGS
|
PCRE_INFO_REQUIREDCHARFLAGS
|
||||||
</pre>
|
</pre>
|
||||||
@ -1559,8 +1594,8 @@ is different. (This seems a highly unlikely scenario.)
|
|||||||
<br><a name="SEC17" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
<br><a name="SEC17" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The function <b>pcre_exec()</b> is called to match a subject string against a
|
The function <b>pcre_exec()</b> is called to match a subject string against a
|
||||||
@ -1723,17 +1758,22 @@ and is described in the
|
|||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>tables</i> field is used to pass a character tables pointer to
|
The <i>tables</i> field is provided for use with patterns that have been
|
||||||
<b>pcre_exec()</b>; this overrides the value that is stored with the compiled
|
pre-compiled using custom character tables, saved to disc or elsewhere, and
|
||||||
pattern. A non-NULL value is stored with the compiled pattern only if custom
|
then reloaded, because the tables that were used to compile a pattern are not
|
||||||
tables were supplied to <b>pcre_compile()</b> via its <i>tableptr</i> argument.
|
saved with it. See the
|
||||||
If NULL is passed to <b>pcre_exec()</b> using this mechanism, it forces PCRE's
|
|
||||||
internal tables to be used. This facility is helpful when re-using patterns
|
|
||||||
that have been saved after compiling with an external set of tables, because
|
|
||||||
the external tables might be at a different address when <b>pcre_exec()</b> is
|
|
||||||
called. See the
|
|
||||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||||
documentation for a discussion of saving compiled patterns for later use.
|
documentation for a discussion of saving compiled patterns for later use. If
|
||||||
|
NULL is passed using this mechanism, it forces PCRE's internal tables to be
|
||||||
|
used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>Warning:</b> The tables that <b>pcre_exec()</b> uses must be the same as those
|
||||||
|
that were used when the pattern was compiled. If this is not the case, the
|
||||||
|
behaviour of <b>pcre_exec()</b> is undefined. Therefore, when a pattern is
|
||||||
|
compiled and matched in the same process, this field should never be set. In
|
||||||
|
this (the most common) case, the correct table pointer is automatically passed
|
||||||
|
with the compiled pattern from <b>pcre_compile()</b> to <b>pcre_exec()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
|
If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
|
||||||
@ -1951,7 +1991,7 @@ all the matches in a single subject string. However, you should be sure that
|
|||||||
the value of <i>startoffset</i> points to the start of a character (or the end
|
the value of <i>startoffset</i> points to the start of a character (or the end
|
||||||
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
||||||
invalid string as a subject or an invalid value of <i>startoffset</i> is
|
invalid string as a subject or an invalid value of <i>startoffset</i> is
|
||||||
undefined. Your program may crash.
|
undefined. Your program may crash or loop.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE_PARTIAL_HARD
|
PCRE_PARTIAL_HARD
|
||||||
PCRE_PARTIAL_SOFT
|
PCRE_PARTIAL_SOFT
|
||||||
@ -2413,17 +2453,17 @@ no longer in use and is never returned.
|
|||||||
<br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
<br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||||
<b>int <i>buffersize</i>);</b>
|
<b> int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Captured substrings can be accessed directly by using the offsets returned by
|
Captured substrings can be accessed directly by using the offsets returned by
|
||||||
@ -2508,19 +2548,19 @@ provided.
|
|||||||
<br><a name="SEC19" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
<br><a name="SEC19" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>);</b>
|
<b> const char *<i>name</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||||
<b>const char **<i>stringptr</i>);</b>
|
<b> const char **<i>stringptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
To extract a substring by name, you first have to find associated number.
|
To extract a substring by name, you first have to find associated number.
|
||||||
@ -2572,7 +2612,7 @@ same number causes an error at compile time.
|
|||||||
<br><a name="SEC20" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC20" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
||||||
@ -2653,9 +2693,9 @@ the value returned is the size of each block that is obtained from the heap.
|
|||||||
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The function <b>pcre_dfa_exec()</b> is called to match a subject string against
|
The function <b>pcre_dfa_exec()</b> is called to match a subject string against
|
||||||
@ -2784,6 +2824,15 @@ matching string is given first. If there were too many matches to fit into
|
|||||||
the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
|
the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
|
||||||
the entire <i>ovector</i> for returning matched strings.
|
the entire <i>ovector</i> for returning matched strings.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
NOTE: PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Error returns from <b>pcre_dfa_exec()</b>
|
Error returns from <b>pcre_dfa_exec()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
@ -2850,7 +2899,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 12 May 2013
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -77,15 +77,50 @@ independent groups).
|
|||||||
Automatic callouts can be used for tracking the progress of pattern matching.
|
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||||
The
|
The
|
||||||
<a href="pcretest.html"><b>pcretest</b></a>
|
<a href="pcretest.html"><b>pcretest</b></a>
|
||||||
command has an option that sets automatic callouts; when it is used, the output
|
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||||
indicates how the pattern is matched. This is useful information when you are
|
used, the output indicates how the pattern is being matched. This is useful
|
||||||
trying to optimize the performance of a particular pattern.
|
information when you are trying to optimize the performance of a particular
|
||||||
|
pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
You should be aware that, because of optimizations in the way PCRE matches
|
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||||
patterns by default, callouts sometimes do not happen. For example, if the
|
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||||
pattern is
|
</P>
|
||||||
|
<P>
|
||||||
|
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||||
|
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||||
|
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
|
||||||
|
then applied with automatic callouts to the string "aaaa" is:
|
||||||
|
<pre>
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
No match
|
||||||
|
</pre>
|
||||||
|
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||||
|
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||||
|
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||||
|
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||||
|
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
|
||||||
|
this:
|
||||||
|
<pre>
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^^ [bc]
|
||||||
|
No match
|
||||||
|
</pre>
|
||||||
|
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||||
|
again, repeatedly, until a+ itself fails.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Other optimizations that provide fast "no match" results also affect callouts.
|
||||||
|
For example, if the pattern is
|
||||||
<pre>
|
<pre>
|
||||||
ab(?C4)cd
|
ab(?C4)cd
|
||||||
</pre>
|
</pre>
|
||||||
@ -109,11 +144,11 @@ callouts such as the example above are obeyed.
|
|||||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE reaches a callout point, the external function
|
During matching, when PCRE reaches a callout point, the external function
|
||||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
|
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
|
||||||
(if it is set). This applies to both normal and DFA matching. The only
|
set). This applies to both normal and DFA matching. The only argument to the
|
||||||
argument to the callout function is a pointer to a <b>pcre_callout</b>
|
callout function is a pointer to a <b>pcre_callout</b> or
|
||||||
or <b>pcre[16|32]_callout</b> block.
|
<b>pcre[16|32]_callout</b> block. These structures contains the following
|
||||||
These structures contains the following fields:
|
fields:
|
||||||
<pre>
|
<pre>
|
||||||
int <i>version</i>;
|
int <i>version</i>;
|
||||||
int <i>callout_number</i>;
|
int <i>callout_number</i>;
|
||||||
@ -242,7 +277,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 March 2013
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -138,18 +138,24 @@ an error is given at compile time.
|
|||||||
<P>
|
<P>
|
||||||
15. Perl recognizes comments in some places that PCRE does not, for example,
|
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||||
PCRE_EXTENDED option is set.
|
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
16. In PCRE, the upper/lower case character properties Lu and Ll are not
|
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||||
|
warning features, so it gives an error in these cases because they are almost
|
||||||
|
certainly user mistakes.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||||
affected when case-independent matching is specified. For example, \p{Lu}
|
affected when case-independent matching is specified. For example, \p{Lu}
|
||||||
always matches an upper case letter. I think Perl has changed in this respect;
|
always matches an upper case letter. I think Perl has changed in this respect;
|
||||||
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
||||||
letters, regardless of case, when case independence is specified.
|
letters, regardless of case, when case independence is specified.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
17. PCRE provides some extensions to the Perl regular expression facilities.
|
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||||
is with respect to Perl 5.10:
|
is with respect to Perl 5.10:
|
||||||
@ -220,7 +226,7 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 19 March 2013
|
Last updated: 10 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -21,9 +21,10 @@ practice be relevant.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||||
|
library. If you want to process regular expressions that are truly enormous,
|
||||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||||
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
||||||
the source distribution and the
|
the source distribution and the
|
||||||
@ -36,7 +37,10 @@ All values in repeating quantifiers must be less than 65536.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns.
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
|
order to limit the amount of system stack used at compile time. The limit can
|
||||||
|
be specified when PCRE is built; the default is 250.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a limit to the number of forward references to subsequent subpatterns
|
There is a limit to the number of forward references to subsequent subpatterns
|
||||||
@ -50,7 +54,7 @@ maximum number of named subpatterns is 10000.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a subject string is the largest positive number that an
|
The maximum length of a subject string is the largest positive number that an
|
||||||
@ -77,9 +81,9 @@ Cambridge CB2 3QH, England.
|
|||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 May 2012
|
Last updated: 05 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -126,6 +126,15 @@ character of the subject. The algorithm does not automatically move on to find
|
|||||||
matches that start at later positions.
|
matches that start at later positions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
There are a number of features of PCRE regular expressions that are not
|
There are a number of features of PCRE regular expressions that are not
|
||||||
supported by the alternative matching algorithm. They are as follows:
|
supported by the alternative matching algorithm. They are as follows:
|
||||||
</P>
|
</P>
|
||||||
@ -224,7 +233,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 January 2012
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2012 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -306,6 +306,16 @@ not retain the previously partially-matched string. It is up to the calling
|
|||||||
program to do that if it needs to.
|
program to do that if it needs to.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||||
|
not possible to try again at a new starting point. All this facility is capable
|
||||||
|
of doing is continuing with the previous match attempt. In the previous
|
||||||
|
example, if the second set of data is "ug23" the result is no match, even
|
||||||
|
though there would be a match for "aug23" if the entire string were given at
|
||||||
|
once. Depending on the application, this may or may not be what you want.
|
||||||
|
The only way to allow for starting again at the next character is to retain the
|
||||||
|
matched part of the subject and try a new complete match.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||||
facility can be used to pass very long subject strings to the DFA matching
|
facility can be used to pass very long subject strings to the DFA matching
|
||||||
@ -490,7 +500,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 20 February 2013
|
Last updated: 02 July 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -23,25 +23,26 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC8" href="#SEC8">MATCHING A SINGLE DATA UNIT</a>
|
<li><a name="TOC8" href="#SEC8">MATCHING A SINGLE DATA UNIT</a>
|
||||||
<li><a name="TOC9" href="#SEC9">SQUARE BRACKETS AND CHARACTER CLASSES</a>
|
<li><a name="TOC9" href="#SEC9">SQUARE BRACKETS AND CHARACTER CLASSES</a>
|
||||||
<li><a name="TOC10" href="#SEC10">POSIX CHARACTER CLASSES</a>
|
<li><a name="TOC10" href="#SEC10">POSIX CHARACTER CLASSES</a>
|
||||||
<li><a name="TOC11" href="#SEC11">VERTICAL BAR</a>
|
<li><a name="TOC11" href="#SEC11">COMPATIBILITY FEATURE FOR WORD BOUNDARIES</a>
|
||||||
<li><a name="TOC12" href="#SEC12">INTERNAL OPTION SETTING</a>
|
<li><a name="TOC12" href="#SEC12">VERTICAL BAR</a>
|
||||||
<li><a name="TOC13" href="#SEC13">SUBPATTERNS</a>
|
<li><a name="TOC13" href="#SEC13">INTERNAL OPTION SETTING</a>
|
||||||
<li><a name="TOC14" href="#SEC14">DUPLICATE SUBPATTERN NUMBERS</a>
|
<li><a name="TOC14" href="#SEC14">SUBPATTERNS</a>
|
||||||
<li><a name="TOC15" href="#SEC15">NAMED SUBPATTERNS</a>
|
<li><a name="TOC15" href="#SEC15">DUPLICATE SUBPATTERN NUMBERS</a>
|
||||||
<li><a name="TOC16" href="#SEC16">REPETITION</a>
|
<li><a name="TOC16" href="#SEC16">NAMED SUBPATTERNS</a>
|
||||||
<li><a name="TOC17" href="#SEC17">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
|
<li><a name="TOC17" href="#SEC17">REPETITION</a>
|
||||||
<li><a name="TOC18" href="#SEC18">BACK REFERENCES</a>
|
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
|
||||||
<li><a name="TOC19" href="#SEC19">ASSERTIONS</a>
|
<li><a name="TOC19" href="#SEC19">BACK REFERENCES</a>
|
||||||
<li><a name="TOC20" href="#SEC20">CONDITIONAL SUBPATTERNS</a>
|
<li><a name="TOC20" href="#SEC20">ASSERTIONS</a>
|
||||||
<li><a name="TOC21" href="#SEC21">COMMENTS</a>
|
<li><a name="TOC21" href="#SEC21">CONDITIONAL SUBPATTERNS</a>
|
||||||
<li><a name="TOC22" href="#SEC22">RECURSIVE PATTERNS</a>
|
<li><a name="TOC22" href="#SEC22">COMMENTS</a>
|
||||||
<li><a name="TOC23" href="#SEC23">SUBPATTERNS AS SUBROUTINES</a>
|
<li><a name="TOC23" href="#SEC23">RECURSIVE PATTERNS</a>
|
||||||
<li><a name="TOC24" href="#SEC24">ONIGURUMA SUBROUTINE SYNTAX</a>
|
<li><a name="TOC24" href="#SEC24">SUBPATTERNS AS SUBROUTINES</a>
|
||||||
<li><a name="TOC25" href="#SEC25">CALLOUTS</a>
|
<li><a name="TOC25" href="#SEC25">ONIGURUMA SUBROUTINE SYNTAX</a>
|
||||||
<li><a name="TOC26" href="#SEC26">BACKTRACKING CONTROL</a>
|
<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
|
||||||
<li><a name="TOC27" href="#SEC27">SEE ALSO</a>
|
<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
|
||||||
<li><a name="TOC28" href="#SEC28">AUTHOR</a>
|
<li><a name="TOC28" href="#SEC28">SEE ALSO</a>
|
||||||
<li><a name="TOC29" href="#SEC29">REVISION</a>
|
<li><a name="TOC29" href="#SEC29">AUTHOR</a>
|
||||||
|
<li><a name="TOC30" href="#SEC30">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION DETAILS</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION DETAILS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -116,21 +117,33 @@ appearance causes an error.
|
|||||||
Unicode property support
|
Unicode property support
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Another special sequence that may appear at the start of a pattern is
|
Another special sequence that may appear at the start of a pattern is (*UCP).
|
||||||
<pre>
|
|
||||||
(*UCP)
|
|
||||||
</pre>
|
|
||||||
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
||||||
such as \d and \w to use Unicode properties to determine character types,
|
such as \d and \w to use Unicode properties to determine character types,
|
||||||
instead of recognizing only characters with codes less than 128 via a lookup
|
instead of recognizing only characters with codes less than 128 via a lookup
|
||||||
table.
|
table.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Disabling auto-possessification
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting
|
||||||
|
the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making
|
||||||
|
quantifiers possessive when what follows cannot match the repeated item. For
|
||||||
|
example, by default a+b is treated as a++b. For more details, see the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Disabling start-up optimizations
|
Disabling start-up optimizations
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
||||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time.
|
PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables
|
||||||
|
several optimizations for quickly reaching "no match" results. For more
|
||||||
|
details, see the
|
||||||
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
|
documentation.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Newline conventions
|
Newline conventions
|
||||||
@ -193,10 +206,10 @@ pattern of the form
|
|||||||
(*LIMIT_RECURSION=d)
|
(*LIMIT_RECURSION=d)
|
||||||
</pre>
|
</pre>
|
||||||
where d is any number of decimal digits. However, the value of the setting must
|
where d is any number of decimal digits. However, the value of the setting must
|
||||||
be less than the value set by the caller of <b>pcre_exec()</b> for it to have
|
be less than the value set (or defaulted) by the caller of <b>pcre_exec()</b>
|
||||||
any effect. In other words, the pattern writer can lower the limit set by the
|
for it to have any effect. In other words, the pattern writer can lower the
|
||||||
programmer, but not raise it. If there is more than one setting of one of these
|
limits set by the programmer, but not raise them. If there is more than one
|
||||||
limits, the lower value is used.
|
setting of one of these limits, the lower value is used.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">EBCDIC CHARACTER CODES</a><br>
|
<br><a name="SEC3" href="#TOC1">EBCDIC CHARACTER CODES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -283,10 +296,11 @@ backslash. All other characters (in particular, those whose codepoints are
|
|||||||
greater than 127) are treated as literals.
|
greater than 127) are treated as literals.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If a pattern is compiled with the PCRE_EXTENDED option, white space in the
|
If a pattern is compiled with the PCRE_EXTENDED option, most white space in the
|
||||||
pattern (other than in a character class) and characters between a # outside
|
pattern (other than in a character class), and characters between a # outside a
|
||||||
a character class and the next newline are ignored. An escaping backslash can
|
character class and the next newline, inclusive, are ignored. An escaping
|
||||||
be used to include a white space or # character as part of the pattern.
|
backslash can be used to include a white space or # character as part of the
|
||||||
|
pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you want to remove the special meaning from a sequence of characters, you
|
If you want to remove the special meaning from a sequence of characters, you
|
||||||
@ -324,7 +338,9 @@ one of the following escape sequences than the binary character it represents:
|
|||||||
\n linefeed (hex 0A)
|
\n linefeed (hex 0A)
|
||||||
\r carriage return (hex 0D)
|
\r carriage return (hex 0D)
|
||||||
\t tab (hex 09)
|
\t tab (hex 09)
|
||||||
|
\0dd character with octal code 0dd
|
||||||
\ddd character with octal code ddd, or back reference
|
\ddd character with octal code ddd, or back reference
|
||||||
|
\o{ddd..} character with octal code ddd..
|
||||||
\xhh character with hex code hh
|
\xhh character with hex code hh
|
||||||
\x{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
\x{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
||||||
\uhhhh character with hex code hhhh (JavaScript mode only)
|
\uhhhh character with hex code hhhh (JavaScript mode only)
|
||||||
@ -347,42 +363,6 @@ the EBCDIC letters are disjoint, \cZ becomes hex 29 (Z is E9), and other
|
|||||||
characters also generate different values.
|
characters also generate different values.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, after \x, from zero to two hexadecimal digits are read (letters
|
|
||||||
can be in upper or lower case). Any number of hexadecimal digits may appear
|
|
||||||
between \x{ and }, but the character code is constrained as follows:
|
|
||||||
<pre>
|
|
||||||
8-bit non-UTF mode less than 0x100
|
|
||||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
|
||||||
16-bit non-UTF mode less than 0x10000
|
|
||||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
|
||||||
32-bit non-UTF mode less than 0x80000000
|
|
||||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
|
||||||
</pre>
|
|
||||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
|
||||||
"surrogate" codepoints), and 0xffef.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If characters other than hexadecimal digits appear between \x{ and }, or if
|
|
||||||
there is no terminating }, this form of escape is not recognized. Instead, the
|
|
||||||
initial \x will be interpreted as a basic hexadecimal escape, with no
|
|
||||||
following digits, giving a character whose value is zero.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x is
|
|
||||||
as just described only when it is followed by two hexadecimal digits.
|
|
||||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
|
||||||
code points greater than 256 is provided by \u, which must be followed by
|
|
||||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
|
||||||
Character codes specified by \u in JavaScript mode are constrained in the same
|
|
||||||
was as those specified by \x in non-JavaScript mode.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Characters whose value is less than 256 can be defined by either of the two
|
|
||||||
syntaxes for \x (or by \u in JavaScript mode). There is no difference in the
|
|
||||||
way they are handled. For example, \xdc is exactly the same as \x{dc} (or
|
|
||||||
\u00dc in JavaScript mode).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
After \0 up to two further octal digits are read. If there are fewer than two
|
After \0 up to two further octal digits are read. If there are fewer than two
|
||||||
digits, just those that are present are used. Thus the sequence \0\x\07
|
digits, just those that are present are used. Thus the sequence \0\x\07
|
||||||
specifies two binary zeros followed by a BEL character (code value 7). Make
|
specifies two binary zeros followed by a BEL character (code value 7). Make
|
||||||
@ -390,9 +370,23 @@ sure you supply two digits after the initial zero if the pattern character that
|
|||||||
follows is itself an octal digit.
|
follows is itself an octal digit.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The handling of a backslash followed by a digit other than 0 is complicated.
|
The escape \o must be followed by a sequence of octal digits, enclosed in
|
||||||
Outside a character class, PCRE reads it and any following digits as a decimal
|
braces. An error occurs if this is not the case. This escape is a recent
|
||||||
number. If the number is less than 10, or if there have been at least that many
|
addition to Perl; it provides way of specifying character code points as octal
|
||||||
|
numbers greater than 0777, and it also allows octal numbers and back references
|
||||||
|
to be unambiguously specified.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For greater clarity and unambiguity, it is best to avoid following \ by a
|
||||||
|
digit greater than zero. Instead, use \o{} or \x{} to specify character
|
||||||
|
numbers, and \g{} to specify back references. The following paragraphs
|
||||||
|
describe the old, ambiguous syntax.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The handling of a backslash followed by a digit other than 0 is complicated,
|
||||||
|
and Perl has changed in recent releases, causing PCRE also to change. Outside a
|
||||||
|
character class, PCRE reads the digit and any following digits as a decimal
|
||||||
|
number. If the number is less than 8, or if there have been at least that many
|
||||||
previous capturing left parentheses in the expression, the entire sequence is
|
previous capturing left parentheses in the expression, the entire sequence is
|
||||||
taken as a <i>back reference</i>. A description of how this works is given
|
taken as a <i>back reference</i>. A description of how this works is given
|
||||||
<a href="#backreferences">later,</a>
|
<a href="#backreferences">later,</a>
|
||||||
@ -400,12 +394,11 @@ following the discussion of
|
|||||||
<a href="#subpattern">parenthesized subpatterns.</a>
|
<a href="#subpattern">parenthesized subpatterns.</a>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Inside a character class, or if the decimal number is greater than 9 and there
|
Inside a character class, or if the decimal number following \ is greater than
|
||||||
have not been that many capturing subpatterns, PCRE re-reads up to three octal
|
7 and there have not been that many capturing subpatterns, PCRE handles \8 and
|
||||||
digits following the backslash, and uses them to generate a data character. Any
|
\9 as the literal characters "8" and "9", and otherwise re-reads up to three
|
||||||
subsequent digits stand for themselves. The value of the character is
|
octal digits following the backslash, using them to generate a data character.
|
||||||
constrained in the same way as characters specified in hexadecimal.
|
Any subsequent digits stand for themselves. For example:
|
||||||
For example:
|
|
||||||
<pre>
|
<pre>
|
||||||
\040 is another way of writing an ASCII space
|
\040 is another way of writing an ASCII space
|
||||||
\40 is the same, provided there are fewer than 40 previous capturing subpatterns
|
\40 is the same, provided there are fewer than 40 previous capturing subpatterns
|
||||||
@ -415,12 +408,53 @@ For example:
|
|||||||
\0113 is a tab followed by the character "3"
|
\0113 is a tab followed by the character "3"
|
||||||
\113 might be a back reference, otherwise the character with octal code 113
|
\113 might be a back reference, otherwise the character with octal code 113
|
||||||
\377 might be a back reference, otherwise the value 255 (decimal)
|
\377 might be a back reference, otherwise the value 255 (decimal)
|
||||||
\81 is either a back reference, or a binary zero followed by the two characters "8" and "1"
|
\81 is either a back reference, or the two characters "8" and "1"
|
||||||
</pre>
|
</pre>
|
||||||
Note that octal values of 100 or greater must not be introduced by a leading
|
Note that octal values of 100 or greater that are specified using this syntax
|
||||||
zero, because no more than three octal digits are ever read.
|
must not be introduced by a leading zero, because no more than three octal
|
||||||
|
digits are ever read.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
By default, after \x that is not followed by {, from zero to two hexadecimal
|
||||||
|
digits are read (letters can be in upper or lower case). Any number of
|
||||||
|
hexadecimal digits may appear between \x{ and }. If a character other than
|
||||||
|
a hexadecimal digit appears between \x{ and }, or if there is no terminating
|
||||||
|
}, an error occurs.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x is
|
||||||
|
as just described only when it is followed by two hexadecimal digits.
|
||||||
|
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||||
|
code points greater than 256 is provided by \u, which must be followed by
|
||||||
|
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Characters whose value is less than 256 can be defined by either of the two
|
||||||
|
syntaxes for \x (or by \u in JavaScript mode). There is no difference in the
|
||||||
|
way they are handled. For example, \xdc is exactly the same as \x{dc} (or
|
||||||
|
\u00dc in JavaScript mode).
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Constraints on character values
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Characters that are specified using octal or hexadecimal numbers are
|
||||||
|
limited to certain values, as follows:
|
||||||
|
<pre>
|
||||||
|
8-bit non-UTF mode less than 0x100
|
||||||
|
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||||
|
16-bit non-UTF mode less than 0x10000
|
||||||
|
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||||
|
32-bit non-UTF mode less than 0x100000000
|
||||||
|
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||||
|
</pre>
|
||||||
|
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||||
|
"surrogate" codepoints), and 0xffef.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Escape sequences in character classes
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
All the sequences that define a single character value can be used both inside
|
All the sequences that define a single character value can be used both inside
|
||||||
and outside character classes. In addition, inside a character class, \b is
|
and outside character classes. In addition, inside a character class, \b is
|
||||||
interpreted as the backspace character (hex 08).
|
interpreted as the backspace character (hex 08).
|
||||||
@ -498,11 +532,14 @@ matching point is at the end of the subject string, all of them fail, because
|
|||||||
there is no character to match.
|
there is no character to match.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For compatibility with Perl, \s does not match the VT character (code 11).
|
For compatibility with Perl, \s did not used to match the VT character (code
|
||||||
This makes it different from the the POSIX "space" class. The \s characters
|
11), which made it different from the the POSIX "space" class. However, Perl
|
||||||
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
|
added VT at release 5.18, and PCRE followed suit at release 8.34. The default
|
||||||
included in a Perl script, \s may match the VT character. In PCRE, it never
|
\s characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space
|
||||||
does.
|
(32), which are defined as white space in the "C" locale. This list may vary if
|
||||||
|
locale-specific matching is taking place. For example, in some locales the
|
||||||
|
"non-breaking space" character (\xA0) is recognized as white space, and in
|
||||||
|
others the VT character is not.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A "word" character is an underscore or any character that is a letter or digit.
|
A "word" character is an underscore or any character that is a letter or digit.
|
||||||
@ -513,21 +550,23 @@ place (see
|
|||||||
in the
|
in the
|
||||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||||
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
||||||
or "french" in Windows, some character codes greater than 128 are used for
|
or "french" in Windows, some character codes greater than 127 are used for
|
||||||
accented letters, and these are then matched by \w. The use of locales with
|
accented letters, and these are then matched by \w. The use of locales with
|
||||||
Unicode is discouraged.
|
Unicode is discouraged.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, in a UTF mode, characters with values greater than 128 never match
|
By default, characters whose code points are greater than 127 never match \d,
|
||||||
\d, \s, or \w, and always match \D, \S, and \W. These sequences retain
|
\s, or \w, and always match \D, \S, and \W, although this may vary for
|
||||||
their original meanings from before UTF support was available, mainly for
|
characters in the range 128-255 when locale-specific matching is happening.
|
||||||
efficiency reasons. However, if PCRE is compiled with Unicode property support,
|
These escape sequences retain their original meanings from before Unicode
|
||||||
and the PCRE_UCP option is set, the behaviour is changed so that Unicode
|
support was available, mainly for efficiency reasons. If PCRE is compiled with
|
||||||
properties are used to determine character types, as follows:
|
Unicode property support, and the PCRE_UCP option is set, the behaviour is
|
||||||
|
changed so that Unicode properties are used to determine character types, as
|
||||||
|
follows:
|
||||||
<pre>
|
<pre>
|
||||||
\d any character that \p{Nd} matches (decimal digit)
|
\d any character that matches \p{Nd} (decimal digit)
|
||||||
\s any character that \p{Z} matches, plus HT, LF, FF, CR
|
\s any character that matches \p{Z} or \h or \v
|
||||||
\w any character that \p{L} or \p{N} matches, plus underscore
|
\w any character that matches \p{L} or \p{N}, plus underscore
|
||||||
</pre>
|
</pre>
|
||||||
The upper case escapes match the inverse sets of characters. Note that \d
|
The upper case escapes match the inverse sets of characters. Note that \d
|
||||||
matches only decimal digits, whereas \w matches any Unicode digit, as well as
|
matches only decimal digits, whereas \w matches any Unicode digit, as well as
|
||||||
@ -538,7 +577,7 @@ is noticeably slower when PCRE_UCP is set.
|
|||||||
<P>
|
<P>
|
||||||
The sequences \h, \H, \v, and \V are features that were added to Perl at
|
The sequences \h, \H, \v, and \V are features that were added to Perl at
|
||||||
release 5.10. In contrast to the other sequences, which match only ASCII
|
release 5.10. In contrast to the other sequences, which match only ASCII
|
||||||
characters by default, these always match certain high-valued codepoints,
|
characters by default, these always match certain high-valued code points,
|
||||||
whether or not PCRE_UCP is set. The horizontal space characters are:
|
whether or not PCRE_UCP is set. The horizontal space characters are:
|
||||||
<pre>
|
<pre>
|
||||||
U+0009 Horizontal tab (HT)
|
U+0009 Horizontal tab (HT)
|
||||||
@ -913,9 +952,9 @@ PCRE's additional properties
|
|||||||
<P>
|
<P>
|
||||||
As well as the standard Unicode properties described above, PCRE supports four
|
As well as the standard Unicode properties described above, PCRE supports four
|
||||||
more that make it possible to convert traditional escape sequences such as \w
|
more that make it possible to convert traditional escape sequences such as \w
|
||||||
and \s and POSIX character classes to use Unicode properties. PCRE uses these
|
and \s to use Unicode properties. PCRE uses these non-standard, non-Perl
|
||||||
non-standard, non-Perl properties internally when PCRE_UCP is set. However,
|
properties internally when PCRE_UCP is set. However, they may also be used
|
||||||
they may also be used explicitly. These properties are:
|
explicitly. These properties are:
|
||||||
<pre>
|
<pre>
|
||||||
Xan Any alphanumeric character
|
Xan Any alphanumeric character
|
||||||
Xps Any POSIX space character
|
Xps Any POSIX space character
|
||||||
@ -925,8 +964,9 @@ they may also be used explicitly. These properties are:
|
|||||||
Xan matches characters that have either the L (letter) or the N (number)
|
Xan matches characters that have either the L (letter) or the N (number)
|
||||||
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
||||||
carriage return, and any other character that has the Z (separator) property.
|
carriage return, and any other character that has the Z (separator) property.
|
||||||
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
|
Xsp is the same as Xps; it used to exclude vertical tab, for Perl
|
||||||
same characters as Xan, plus underscore.
|
compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd
|
||||||
|
matches the same characters as Xan, plus underscore.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is another non-standard property, Xuc, which matches any character that
|
There is another non-standard property, Xuc, which matches any character that
|
||||||
@ -1218,7 +1258,9 @@ The minus (hyphen) character can be used to specify a range of characters in a
|
|||||||
character class. For example, [d-m] matches any letter between d and m,
|
character class. For example, [d-m] matches any letter between d and m,
|
||||||
inclusive. If a minus character is required in a class, it must be escaped with
|
inclusive. If a minus character is required in a class, it must be escaped with
|
||||||
a backslash or appear in a position where it cannot be interpreted as
|
a backslash or appear in a position where it cannot be interpreted as
|
||||||
indicating a range, typically as the first or last character in the class.
|
indicating a range, typically as the first or last character in the class, or
|
||||||
|
immediately after a range. For example, [b-d-z] matches letters in the range b
|
||||||
|
to d, a hyphen character, or z.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
@ -1230,6 +1272,12 @@ followed by two other characters. The octal or hexadecimal representation of
|
|||||||
"]" can also be used to end a range.
|
"]" can also be used to end a range.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
An error is generated if a POSIX character class (see below) or an escape
|
||||||
|
sequence other than one that defines a single character appears at a point
|
||||||
|
where a range ending character is expected. For example, [z-\xff] is valid,
|
||||||
|
but [A-\d] and [A-[:digit:]] are not.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
Ranges operate in the collating sequence of character values. They can also be
|
Ranges operate in the collating sequence of character values. They can also be
|
||||||
used for characters specified numerically, for example [\000-\037]. Ranges
|
used for characters specified numerically, for example [\000-\037]. Ranges
|
||||||
can include any characters that are valid for the current mode.
|
can include any characters that are valid for the current mode.
|
||||||
@ -1269,9 +1317,9 @@ something AND NOT ...".
|
|||||||
The only metacharacters that are recognized in character classes are backslash,
|
The only metacharacters that are recognized in character classes are backslash,
|
||||||
hyphen (only where it can be interpreted as specifying a range), circumflex
|
hyphen (only where it can be interpreted as specifying a range), circumflex
|
||||||
(only at the start), opening square bracket (only when it can be interpreted as
|
(only at the start), opening square bracket (only when it can be interpreted as
|
||||||
introducing a POSIX class name - see the next section), and the terminating
|
introducing a POSIX class name, or for a special compatibility feature - see
|
||||||
closing square bracket. However, escaping other non-alphanumeric characters
|
the next two sections), and the terminating closing square bracket. However,
|
||||||
does no harm.
|
escaping other non-alphanumeric characters does no harm.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">POSIX CHARACTER CLASSES</a><br>
|
<br><a name="SEC10" href="#TOC1">POSIX CHARACTER CLASSES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -1294,15 +1342,17 @@ are:
|
|||||||
lower lower case letters
|
lower lower case letters
|
||||||
print printing characters, including space
|
print printing characters, including space
|
||||||
punct printing characters, excluding letters and digits and space
|
punct printing characters, excluding letters and digits and space
|
||||||
space white space (not quite the same as \s)
|
space white space (the same as \s from PCRE 8.34)
|
||||||
upper upper case letters
|
upper upper case letters
|
||||||
word "word" characters (same as \w)
|
word "word" characters (same as \w)
|
||||||
xdigit hexadecimal digits
|
xdigit hexadecimal digits
|
||||||
</pre>
|
</pre>
|
||||||
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
|
The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
|
||||||
space (32). Notice that this list includes the VT character (code 11). This
|
and space (32). If locale-specific matching is taking place, the list of space
|
||||||
makes "space" different to \s, which does not include VT (for Perl
|
characters may be different; there may be fewer or more of them. "Space" used
|
||||||
compatibility).
|
to be different to \s, which did not include VT, for Perl compatibility.
|
||||||
|
However, Perl changed at release 5.18, and PCRE followed at release 8.34.
|
||||||
|
"Space" and \s now match the same set of characters.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
||||||
@ -1316,11 +1366,11 @@ syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
|
|||||||
supported, and an error is given if they are encountered.
|
supported, and an error is given if they are encountered.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, in UTF modes, characters with values greater than 128 do not match
|
By default, characters with values greater than 128 do not match any of the
|
||||||
any of the POSIX character classes. However, if the PCRE_UCP option is passed
|
POSIX character classes. However, if the PCRE_UCP option is passed to
|
||||||
to <b>pcre_compile()</b>, some of the classes are changed so that Unicode
|
<b>pcre_compile()</b>, some of the classes are changed so that Unicode character
|
||||||
character properties are used. This is achieved by replacing the POSIX classes
|
properties are used. This is achieved by replacing certain POSIX classes by
|
||||||
by other sequences, as follows:
|
other sequences, as follows:
|
||||||
<pre>
|
<pre>
|
||||||
[:alnum:] becomes \p{Xan}
|
[:alnum:] becomes \p{Xan}
|
||||||
[:alpha:] becomes \p{L}
|
[:alpha:] becomes \p{L}
|
||||||
@ -1331,11 +1381,56 @@ by other sequences, as follows:
|
|||||||
[:upper:] becomes \p{Lu}
|
[:upper:] becomes \p{Lu}
|
||||||
[:word:] becomes \p{Xwd}
|
[:word:] becomes \p{Xwd}
|
||||||
</pre>
|
</pre>
|
||||||
Negated versions, such as [:^alpha:] use \P instead of \p. The other POSIX
|
Negated versions, such as [:^alpha:] use \P instead of \p. Three other POSIX
|
||||||
classes are unchanged, and match only characters with code points less than
|
classes are handled specially in UCP mode:
|
||||||
128.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">VERTICAL BAR</a><br>
|
<P>
|
||||||
|
[:graph:]
|
||||||
|
This matches characters that have glyphs that mark the page when printed. In
|
||||||
|
Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
|
||||||
|
properties, except for:
|
||||||
|
<pre>
|
||||||
|
U+061C Arabic Letter Mark
|
||||||
|
U+180E Mongolian Vowel Separator
|
||||||
|
U+2066 - U+2069 Various "isolate"s
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
[:print:]
|
||||||
|
This matches the same characters as [:graph:] plus space characters that are
|
||||||
|
not controls, that is, characters with the Zs property.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
[:punct:]
|
||||||
|
This matches all characters that have the Unicode P (punctuation) property,
|
||||||
|
plus those characters whose code points are less than 128 that have the S
|
||||||
|
(Symbol) property.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The other POSIX classes are unchanged, and match only characters with code
|
||||||
|
points less than 128.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC11" href="#TOC1">COMPATIBILITY FEATURE FOR WORD BOUNDARIES</a><br>
|
||||||
|
<P>
|
||||||
|
In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly
|
||||||
|
syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of
|
||||||
|
word". PCRE treats these items as follows:
|
||||||
|
<pre>
|
||||||
|
[[:<:]] is converted to \b(?=\w)
|
||||||
|
[[:>:]] is converted to \b(?<=\w)
|
||||||
|
</pre>
|
||||||
|
Only these exact character sequences are recognized. A sequence such as
|
||||||
|
[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is
|
||||||
|
not compatible with Perl. It is provided to help migrations from other
|
||||||
|
environments, and is best not used in any new patterns. Note that \b matches
|
||||||
|
at the start and the end of a word (see
|
||||||
|
<a href="#smallassertions">"Simple assertions"</a>
|
||||||
|
above), and in a Perl-style pattern the preceding or following character
|
||||||
|
normally shows which is wanted, without the need for the assertions that are
|
||||||
|
used above in order to give exactly the POSIX behaviour.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC12" href="#TOC1">VERTICAL BAR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Vertical bar characters are used to separate alternative patterns. For example,
|
Vertical bar characters are used to separate alternative patterns. For example,
|
||||||
the pattern
|
the pattern
|
||||||
@ -1350,7 +1445,7 @@ that succeeds is used. If the alternatives are within a subpattern
|
|||||||
"succeeds" means matching the rest of the main pattern as well as the
|
"succeeds" means matching the rest of the main pattern as well as the
|
||||||
alternative in the subpattern.
|
alternative in the subpattern.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
|
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
|
||||||
PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
|
PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
|
||||||
@ -1413,7 +1508,7 @@ options, respectively. The (*UTF) sequence is a generic version that can be
|
|||||||
used with any of the libraries. However, the application can set the
|
used with any of the libraries. However, the application can set the
|
||||||
PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences.
|
PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences.
|
||||||
<a name="subpattern"></a></P>
|
<a name="subpattern"></a></P>
|
||||||
<br><a name="SEC13" href="#TOC1">SUBPATTERNS</a><br>
|
<br><a name="SEC14" href="#TOC1">SUBPATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Subpatterns are delimited by parentheses (round brackets), which can be nested.
|
Subpatterns are delimited by parentheses (round brackets), which can be nested.
|
||||||
Turning part of a pattern into a subpattern does two things:
|
Turning part of a pattern into a subpattern does two things:
|
||||||
@ -1469,7 +1564,7 @@ from left to right, and options are not reset until the end of the subpattern
|
|||||||
is reached, an option setting in one branch does affect subsequent branches, so
|
is reached, an option setting in one branch does affect subsequent branches, so
|
||||||
the above patterns match "SUNDAY" as well as "Saturday".
|
the above patterns match "SUNDAY" as well as "Saturday".
|
||||||
<a name="dupsubpatternnumber"></a></P>
|
<a name="dupsubpatternnumber"></a></P>
|
||||||
<br><a name="SEC14" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
|
<br><a name="SEC15" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
|
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
|
||||||
the same numbers for its capturing parentheses. Such a subpattern starts with
|
the same numbers for its capturing parentheses. Such a subpattern starts with
|
||||||
@ -1513,7 +1608,7 @@ true if any of the subpatterns of that number have matched.
|
|||||||
An alternative approach to using this "branch reset" feature is to use
|
An alternative approach to using this "branch reset" feature is to use
|
||||||
duplicate named subpatterns, as described in the next section.
|
duplicate named subpatterns, as described in the next section.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
<br><a name="SEC16" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Identifying capturing parentheses by number is simple, but it can be very hard
|
Identifying capturing parentheses by number is simple, but it can be very hard
|
||||||
to keep track of the numbers in complicated regular expressions. Furthermore,
|
to keep track of the numbers in complicated regular expressions. Furthermore,
|
||||||
@ -1535,11 +1630,12 @@ and
|
|||||||
can be made by name as well as by number.
|
can be made by name as well as by number.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Names consist of up to 32 alphanumeric characters and underscores. Named
|
Names consist of up to 32 alphanumeric characters and underscores, but must
|
||||||
capturing parentheses are still allocated numbers as well as names, exactly as
|
start with a non-digit. Named capturing parentheses are still allocated numbers
|
||||||
if the names were not present. The PCRE API provides function calls for
|
as well as names, exactly as if the names were not present. The PCRE API
|
||||||
extracting the name-to-number translation table from a compiled pattern. There
|
provides function calls for extracting the name-to-number translation table
|
||||||
is also a convenience function for extracting a captured substring by name.
|
from a compiled pattern. There is also a convenience function for extracting a
|
||||||
|
captured substring by name.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, a name must be unique within a pattern, but it is possible to relax
|
By default, a name must be unique within a pattern, but it is possible to relax
|
||||||
@ -1568,9 +1664,23 @@ matched. This saves searching to find which numbered subpattern it was.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you make a back reference to a non-unique named subpattern from elsewhere in
|
If you make a back reference to a non-unique named subpattern from elsewhere in
|
||||||
the pattern, the one that corresponds to the first occurrence of the name is
|
the pattern, the subpatterns to which the name refers are checked in the order
|
||||||
used. In the absence of duplicate numbers (see the previous section) this is
|
in which they appear in the overall pattern. The first one that is set is used
|
||||||
the one with the lowest number. If you use a named reference in a condition
|
for the reference. For example, this pattern matches both "foofoo" and
|
||||||
|
"barbar" but not "foobar" or "barfoo":
|
||||||
|
<pre>
|
||||||
|
(?:(?<n>foo)|(?<n>bar))\k<n>
|
||||||
|
|
||||||
|
</PRE>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If you make a subroutine call to a non-unique named subpattern, the one that
|
||||||
|
corresponds to the first occurrence of the name is used. In the absence of
|
||||||
|
duplicate numbers (see the previous section) this is the one with the lowest
|
||||||
|
number.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If you use a named reference in a condition
|
||||||
test (see the
|
test (see the
|
||||||
<a href="#conditions">section about conditions</a>
|
<a href="#conditions">section about conditions</a>
|
||||||
below), either to check whether a subpattern has matched, or to check for
|
below), either to check whether a subpattern has matched, or to check for
|
||||||
@ -1585,10 +1695,11 @@ documentation.
|
|||||||
<b>Warning:</b> You cannot use different names to distinguish between two
|
<b>Warning:</b> You cannot use different names to distinguish between two
|
||||||
subpatterns with the same number because PCRE uses only the numbers when
|
subpatterns with the same number because PCRE uses only the numbers when
|
||||||
matching. For this reason, an error is given at compile time if different names
|
matching. For this reason, an error is given at compile time if different names
|
||||||
are given to subpatterns with the same number. However, you can give the same
|
are given to subpatterns with the same number. However, you can always give the
|
||||||
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
|
same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
|
||||||
|
set.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">REPETITION</a><br>
|
<br><a name="SEC17" href="#TOC1">REPETITION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Repetition is specified by quantifiers, which can follow any of the following
|
Repetition is specified by quantifiers, which can follow any of the following
|
||||||
items:
|
items:
|
||||||
@ -1756,7 +1867,7 @@ example, after
|
|||||||
</pre>
|
</pre>
|
||||||
matches "aba" the value of the second captured substring is "b".
|
matches "aba" the value of the second captured substring is "b".
|
||||||
<a name="atomicgroup"></a></P>
|
<a name="atomicgroup"></a></P>
|
||||||
<br><a name="SEC17" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
|
<br><a name="SEC18" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
|
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
|
||||||
repetition, failure of what follows normally causes the repeated item to be
|
repetition, failure of what follows normally causes the repeated item to be
|
||||||
@ -1860,7 +1971,7 @@ an atomic group, like this:
|
|||||||
</pre>
|
</pre>
|
||||||
sequences of non-digits cannot be broken, and failure happens quickly.
|
sequences of non-digits cannot be broken, and failure happens quickly.
|
||||||
<a name="backreferences"></a></P>
|
<a name="backreferences"></a></P>
|
||||||
<br><a name="SEC18" href="#TOC1">BACK REFERENCES</a><br>
|
<br><a name="SEC19" href="#TOC1">BACK REFERENCES</a><br>
|
||||||
<P>
|
<P>
|
||||||
Outside a character class, a backslash followed by a digit greater than 0 (and
|
Outside a character class, a backslash followed by a digit greater than 0 (and
|
||||||
possibly further digits) is a back reference to a capturing subpattern earlier
|
possibly further digits) is a back reference to a capturing subpattern earlier
|
||||||
@ -1988,7 +2099,7 @@ as an
|
|||||||
Once the whole group has been matched, a subsequent matching failure cannot
|
Once the whole group has been matched, a subsequent matching failure cannot
|
||||||
cause backtracking into the middle of the group.
|
cause backtracking into the middle of the group.
|
||||||
<a name="bigassertions"></a></P>
|
<a name="bigassertions"></a></P>
|
||||||
<br><a name="SEC19" href="#TOC1">ASSERTIONS</a><br>
|
<br><a name="SEC20" href="#TOC1">ASSERTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
An assertion is a test on the characters following or preceding the current
|
An assertion is a test on the characters following or preceding the current
|
||||||
matching point that does not actually consume any characters. The simple
|
matching point that does not actually consume any characters. The simple
|
||||||
@ -2178,7 +2289,7 @@ preceded by "foo", while
|
|||||||
is another pattern that matches "foo" preceded by three digits and any three
|
is another pattern that matches "foo" preceded by three digits and any three
|
||||||
characters that are not "999".
|
characters that are not "999".
|
||||||
<a name="conditions"></a></P>
|
<a name="conditions"></a></P>
|
||||||
<br><a name="SEC20" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
|
<br><a name="SEC21" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
It is possible to cause the matching process to obey a subpattern
|
It is possible to cause the matching process to obey a subpattern
|
||||||
conditionally or to choose between two alternative subpatterns, depending on
|
conditionally or to choose between two alternative subpatterns, depending on
|
||||||
@ -2252,12 +2363,7 @@ Checking for a used subpattern by name
|
|||||||
<P>
|
<P>
|
||||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||||
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
||||||
this facility before Perl, the syntax (?(name)...) is also recognized. However,
|
this facility before Perl, the syntax (?(name)...) is also recognized.
|
||||||
there is a possible ambiguity with this syntax, because subpattern names may
|
|
||||||
consist entirely of digits. PCRE looks first for a named subpattern; if it
|
|
||||||
cannot find one and the name consists entirely of digits, PCRE looks for a
|
|
||||||
subpattern of that number, which must be greater than zero. Using subpattern
|
|
||||||
names that consist entirely of digits is not recommended.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Rewriting the above example to use a named subpattern gives this:
|
Rewriting the above example to use a named subpattern gives this:
|
||||||
@ -2333,7 +2439,7 @@ subject is matched against the first alternative; otherwise it is matched
|
|||||||
against the second. This pattern matches strings in one of the two forms
|
against the second. This pattern matches strings in one of the two forms
|
||||||
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
|
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
|
||||||
<a name="comments"></a></P>
|
<a name="comments"></a></P>
|
||||||
<br><a name="SEC21" href="#TOC1">COMMENTS</a><br>
|
<br><a name="SEC22" href="#TOC1">COMMENTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
There are two ways of including comments in patterns that are processed by
|
There are two ways of including comments in patterns that are processed by
|
||||||
PCRE. In both cases, the start of the comment must not be in a character class,
|
PCRE. In both cases, the start of the comment must not be in a character class,
|
||||||
@ -2362,7 +2468,7 @@ a newline in the pattern. The sequence \n is still literal at this stage, so
|
|||||||
it does not terminate the comment. Only an actual character with the code value
|
it does not terminate the comment. Only an actual character with the code value
|
||||||
0x0a (the default newline) does so.
|
0x0a (the default newline) does so.
|
||||||
<a name="recursion"></a></P>
|
<a name="recursion"></a></P>
|
||||||
<br><a name="SEC22" href="#TOC1">RECURSIVE PATTERNS</a><br>
|
<br><a name="SEC23" href="#TOC1">RECURSIVE PATTERNS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Consider the problem of matching a string in parentheses, allowing for
|
Consider the problem of matching a string in parentheses, allowing for
|
||||||
unlimited nested parentheses. Without the use of recursion, the best that can
|
unlimited nested parentheses. Without the use of recursion, the best that can
|
||||||
@ -2577,7 +2683,7 @@ now match "b" and so the whole match succeeds. In Perl, the pattern fails to
|
|||||||
match because inside the recursive call \1 cannot access the externally set
|
match because inside the recursive call \1 cannot access the externally set
|
||||||
value.
|
value.
|
||||||
<a name="subpatternsassubroutines"></a></P>
|
<a name="subpatternsassubroutines"></a></P>
|
||||||
<br><a name="SEC23" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
|
<br><a name="SEC24" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
If the syntax for a recursive subpattern call (either by number or by
|
If the syntax for a recursive subpattern call (either by number or by
|
||||||
name) is used outside the parentheses to which it refers, it operates like a
|
name) is used outside the parentheses to which it refers, it operates like a
|
||||||
@ -2618,7 +2724,7 @@ different calls. For example, consider this pattern:
|
|||||||
It matches "abcabc". It does not match "abcABC" because the change of
|
It matches "abcabc". It does not match "abcABC" because the change of
|
||||||
processing option does not affect the called subpattern.
|
processing option does not affect the called subpattern.
|
||||||
<a name="onigurumasubroutines"></a></P>
|
<a name="onigurumasubroutines"></a></P>
|
||||||
<br><a name="SEC24" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
|
<br><a name="SEC25" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
|
||||||
<P>
|
<P>
|
||||||
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
|
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
|
||||||
a number enclosed either in angle brackets or single quotes, is an alternative
|
a number enclosed either in angle brackets or single quotes, is an alternative
|
||||||
@ -2636,7 +2742,7 @@ plus or a minus sign it is taken as a relative reference. For example:
|
|||||||
Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are <i>not</i>
|
Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are <i>not</i>
|
||||||
synonymous. The former is a back reference; the latter is a subroutine call.
|
synonymous. The former is a back reference; the latter is a subroutine call.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
|
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
|
||||||
code to be obeyed in the middle of matching a regular expression. This makes it
|
code to be obeyed in the middle of matching a regular expression. This makes it
|
||||||
@ -2674,12 +2780,18 @@ During matching, when PCRE reaches a callout point, the external function is
|
|||||||
called. It is provided with the number of the callout, the position in the
|
called. It is provided with the number of the callout, the position in the
|
||||||
pattern, and, optionally, one item of data originally supplied by the caller of
|
pattern, and, optionally, one item of data originally supplied by the caller of
|
||||||
the matching function. The callout function may cause matching to proceed, to
|
the matching function. The callout function may cause matching to proceed, to
|
||||||
backtrack, or to fail altogether. A complete description of the interface to
|
backtrack, or to fail altogether.
|
||||||
the callout function is given in the
|
</P>
|
||||||
|
<P>
|
||||||
|
By default, PCRE implements a number of optimizations at compile time and
|
||||||
|
matching time, and one side-effect is that sometimes callouts are skipped. If
|
||||||
|
you need all possible callouts to happen, you need to set options that disable
|
||||||
|
the relevant optimizations. More details, and a complete description of the
|
||||||
|
interface to the callout function, are given in the
|
||||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
<a name="backtrackcontrol"></a></P>
|
<a name="backtrackcontrol"></a></P>
|
||||||
<br><a name="SEC26" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||||
<P>
|
<P>
|
||||||
Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
|
Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
|
||||||
are still described in the Perl documentation as "experimental and subject to
|
are still described in the Perl documentation as "experimental and subject to
|
||||||
@ -3026,7 +3138,7 @@ example:
|
|||||||
<pre>
|
<pre>
|
||||||
...(*COMMIT)(*PRUNE)...
|
...(*COMMIT)(*PRUNE)...
|
||||||
</pre>
|
</pre>
|
||||||
If there is a matching failure to the right, backtracking onto (*PRUNE) cases
|
If there is a matching failure to the right, backtracking onto (*PRUNE) causes
|
||||||
it to be triggered, and its action is taken. There can never be a backtrack
|
it to be triggered, and its action is taken. There can never be a backtrack
|
||||||
onto (*COMMIT).
|
onto (*COMMIT).
|
||||||
<a name="btrepeat"></a></P>
|
<a name="btrepeat"></a></P>
|
||||||
@ -3093,12 +3205,12 @@ the subroutine match to fail.
|
|||||||
the subpattern that has alternatives. If there is no such group within the
|
the subpattern that has alternatives. If there is no such group within the
|
||||||
subpattern, (*THEN) causes the subroutine match to fail.
|
subpattern, (*THEN) causes the subroutine match to fail.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC28" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
|
<b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
|
||||||
<b>pcresyntax</b>(3), <b>pcre</b>(3), <b>pcre16(3)</b>, <b>pcre32(3)</b>.
|
<b>pcresyntax</b>(3), <b>pcre</b>(3), <b>pcre16(3)</b>, <b>pcre32(3)</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC28" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC29" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
@ -3107,9 +3219,9 @@ University Computing Service
|
|||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC29" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 26 April 2013
|
Last updated: 03 December 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -13,7 +13,7 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||||||
man page, in case the conversion went wrong.
|
man page, in case the conversion went wrong.
|
||||||
<br>
|
<br>
|
||||||
<ul>
|
<ul>
|
||||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
|
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||||
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
||||||
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
||||||
@ -23,23 +23,21 @@ man page, in case the conversion went wrong.
|
|||||||
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
||||||
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>#include <pcreposix.h></b>
|
<b>#include <pcreposix.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
||||||
<b>int <i>cflags</i>);</b>
|
<b> int <i>cflags</i>);</b>
|
||||||
</P>
|
<br>
|
||||||
<P>
|
<br>
|
||||||
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
||||||
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||||
</P>
|
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||||
<P>
|
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||||
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
<br>
|
||||||
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
<br>
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
<b>void regfree(regex_t *<i>preg</i>);</b>
|
<b>void regfree(regex_t *<i>preg</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
|
@ -102,8 +102,8 @@ study data.
|
|||||||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
|
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary, you
|
||||||
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||||
the usual way.
|
the usual way.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -119,6 +119,11 @@ in the
|
|||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>Warning:</b> The tables that <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> use
|
||||||
|
must be the same as those that were used when the pattern was compiled. If this
|
||||||
|
is not the case, the behaviour is undefined.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
If you did not provide custom character tables when the pattern was compiled,
|
If you did not provide custom character tables when the pattern was compiled,
|
||||||
the pointer in the compiled pattern is NULL, which causes the matching
|
the pointer in the compiled pattern is NULL, which causes the matching
|
||||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||||
@ -126,9 +131,9 @@ special action at run time in this case.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you saved study data with the compiled pattern, you need to create your own
|
If you saved study data with the compiled pattern, you need to create your own
|
||||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
|
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point
|
||||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
the <i>flags</i> field to indicate that study data is present. Then pass the
|
||||||
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
||||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||||
and so is lost by a save/restore cycle.
|
and so is lost by a save/restore cycle.
|
||||||
@ -149,9 +154,9 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 24 June 2012
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2012 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -65,10 +65,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\n newline (hex 0A)
|
\n newline (hex 0A)
|
||||||
\r carriage return (hex 0D)
|
\r carriage return (hex 0D)
|
||||||
\t tab (hex 09)
|
\t tab (hex 09)
|
||||||
|
\0dd character with octal code 0dd
|
||||||
\ddd character with octal code ddd, or backreference
|
\ddd character with octal code ddd, or backreference
|
||||||
|
\o{ddd..} character with octal code ddd..
|
||||||
\xhh character with hex code hh
|
\xhh character with hex code hh
|
||||||
\x{hhh..} character with hex code hhh..
|
\x{hhh..} character with hex code hhh..
|
||||||
</PRE>
|
</pre>
|
||||||
|
Note that \0dd is always an octal code, and that \8 and \9 are the literal
|
||||||
|
characters "8" and "9".
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -92,9 +96,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\W a "non-word" character
|
\W a "non-word" character
|
||||||
\X a Unicode extended grapheme cluster
|
\X a Unicode extended grapheme cluster
|
||||||
</pre>
|
</pre>
|
||||||
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
|
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
||||||
characters, even in a UTF mode. However, this can be changed by setting the
|
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||||
PCRE_UCP option.
|
happening, \s and \w may also match characters with code points in the range
|
||||||
|
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||||
|
is changed to use Unicode properties and they match many more characters.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -150,11 +156,13 @@ PCRE_UCP option.
|
|||||||
<pre>
|
<pre>
|
||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
Xuc Univerally-named character: one that can be
|
Xuc Univerally-named character: one that can be
|
||||||
represented by a Universal Character Name
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
</PRE>
|
</pre>
|
||||||
|
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||||
|
at release 5.18 and PCRE changed at release 8.34.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -385,7 +393,9 @@ newline-setting options with similar syntax:
|
|||||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||||
(*UTF) set appropriate UTF mode for the library in use
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
||||||
</PRE>
|
</pre>
|
||||||
|
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||||
|
limits set by the caller of pcre_exec(), not increase them.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -516,7 +526,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 26 April 2013
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -187,6 +187,11 @@ equivalent to adding <b>/M</b> to each regular expression. The size is given in
|
|||||||
bytes for both libraries.
|
bytes for both libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-O</b>
|
||||||
|
Behave as if each pattern has the <b>/O</b> modifier, that is disable
|
||||||
|
auto-possessification for all patterns.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-o</b> <i>osize</i>
|
<b>-o</b> <i>osize</i>
|
||||||
Set the number of elements in the output vector that is used when calling
|
Set the number of elements in the output vector that is used when calling
|
||||||
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
||||||
@ -256,19 +261,24 @@ should never be studied (see the <b>/S</b> pattern modifier below).
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-t</b>
|
<b>-t</b>
|
||||||
Run each compile, study, and match many times with a timer, and output
|
Run each compile, study, and match many times with a timer, and output the
|
||||||
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
|
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||||
<b>-t</b>, because you will then get the size output a zillion times, and the
|
<b>-m</b> with <b>-t</b>, because you will then get the size output a zillion
|
||||||
timing will be distorted. You can control the number of iterations that are
|
times, and the timing will be distorted. You can control the number of
|
||||||
used for timing by following <b>-t</b> with a number (as a separate item on the
|
iterations that are used for timing by following <b>-t</b> with a number (as a
|
||||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||||
to iterate 500000 times.
|
The default is to iterate 500000 times.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-tm</b>
|
<b>-tm</b>
|
||||||
This is like <b>-t</b> except that it times only the matching phase, not the
|
This is like <b>-t</b> except that it times only the matching phase, not the
|
||||||
compile or study phases.
|
compile or study phases.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>-T</b> <b>-TM</b>
|
||||||
|
These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
|
||||||
|
the total times for all compiles, studies, and matches are output.
|
||||||
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
||||||
@ -287,7 +297,7 @@ option states whether or not <b>readline()</b> will be used.
|
|||||||
<P>
|
<P>
|
||||||
The program handles any number of sets of input on a single input file. Each
|
The program handles any number of sets of input on a single input file. Each
|
||||||
set starts with a regular expression, and continues with any number of data
|
set starts with a regular expression, and continues with any number of data
|
||||||
lines to be matched against the pattern.
|
lines to be matched against that pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Each data line is matched separately and independently. If you want to do
|
Each data line is matched separately and independently. If you want to do
|
||||||
@ -361,6 +371,7 @@ sections.
|
|||||||
<b>/M</b> show compiled memory size
|
<b>/M</b> show compiled memory size
|
||||||
<b>/m</b> set PCRE_MULTILINE
|
<b>/m</b> set PCRE_MULTILINE
|
||||||
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
||||||
|
<b>/O</b> set PCRE_NO_AUTO_POSSESS
|
||||||
<b>/P</b> use the POSIX wrapper
|
<b>/P</b> use the POSIX wrapper
|
||||||
<b>/S</b> study the pattern after compilation
|
<b>/S</b> study the pattern after compilation
|
||||||
<b>/s</b> set PCRE_DOTALL
|
<b>/s</b> set PCRE_DOTALL
|
||||||
@ -419,6 +430,7 @@ options that do not correspond to anything in Perl:
|
|||||||
<b>/f</b> PCRE_FIRSTLINE
|
<b>/f</b> PCRE_FIRSTLINE
|
||||||
<b>/J</b> PCRE_DUPNAMES
|
<b>/J</b> PCRE_DUPNAMES
|
||||||
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
||||||
|
<b>/O</b> PCRE_NO_AUTO_POSSESS
|
||||||
<b>/U</b> PCRE_UNGREEDY
|
<b>/U</b> PCRE_UNGREEDY
|
||||||
<b>/W</b> PCRE_UCP
|
<b>/W</b> PCRE_UCP
|
||||||
<b>/X</b> PCRE_EXTRA
|
<b>/X</b> PCRE_EXTRA
|
||||||
@ -562,8 +574,8 @@ matched. There are a number of qualifying characters that may follow <b>/S</b>.
|
|||||||
They may appear in any order.
|
They may appear in any order.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If <b>S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is called
|
If <b>/S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is
|
||||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||||
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
@ -642,6 +654,37 @@ function:
|
|||||||
The <b>/+</b> modifier works as described above. All other modifiers are
|
The <b>/+</b> modifier works as described above. All other modifiers are
|
||||||
ignored.
|
ignored.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Locking out certain modifiers
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
PCRE can be compiled with or without support for certain features such as
|
||||||
|
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||||
|
into a number of different files that are selected for running depending on
|
||||||
|
which features are available. When updating the tests, it is all too easy to
|
||||||
|
put a new test into the wrong file by mistake; for example, to put a test that
|
||||||
|
requires UTF support into a file that is used when it is not available. To help
|
||||||
|
detect such mistakes as early as possible, there is a facility for locking out
|
||||||
|
specific modifiers. If an input line for <b>pcretest</b> starts with the string
|
||||||
|
"< forbid " the following sequence of characters is taken as a list of
|
||||||
|
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||||
|
Unicode property support, this line appears:
|
||||||
|
<pre>
|
||||||
|
< forbid 8W
|
||||||
|
</pre>
|
||||||
|
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||||
|
subsequently encountered. If the character string contains < but not >, all the
|
||||||
|
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||||
|
modifiers must be explicitly listed, for example:
|
||||||
|
<pre>
|
||||||
|
< forbid <JS><cr>
|
||||||
|
</pre>
|
||||||
|
There must be a single space between < and "forbid" for this feature to be
|
||||||
|
recognised. If there is not, the line is interpreted either as a request to
|
||||||
|
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||||
|
below) or, if there is a another < character, as a pattern that uses < as its
|
||||||
|
delimiter.
|
||||||
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
|
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
||||||
@ -662,6 +705,7 @@ recognized:
|
|||||||
\v vertical tab (\x0b)
|
\v vertical tab (\x0b)
|
||||||
\nnn octal character (up to 3 octal digits); always
|
\nnn octal character (up to 3 octal digits); always
|
||||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||||
|
\o{dd...} octal character (any number of octal digits}
|
||||||
\xhh hexadecimal byte (up to 2 hex digits)
|
\xhh hexadecimal byte (up to 2 hex digits)
|
||||||
\x{hh...} hexadecimal character (any number of hex digits)
|
\x{hh...} hexadecimal character (any number of hex digits)
|
||||||
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
||||||
@ -1031,10 +1075,9 @@ writing the file, <b>pcretest</b> expects to read a new pattern.
|
|||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
||||||
name instead of a pattern. The name of the file must not contain a < character,
|
name instead of a pattern. There must be no space between < and the file name,
|
||||||
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by <
|
which must not contain a < character, as otherwise <b>pcretest</b> will
|
||||||
characters.
|
interpret the line as a pattern delimited by < characters. For example:
|
||||||
For example:
|
|
||||||
<pre>
|
<pre>
|
||||||
re> </some/file
|
re> </some/file
|
||||||
Compiled pattern loaded from /some/file
|
Compiled pattern loaded from /some/file
|
||||||
@ -1091,7 +1134,7 @@ Cambridge CB2 3QH, England.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 26 April 2013
|
Last updated: 12 November 2013
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2013 University of Cambridge.
|
Copyright © 1997-2013 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
@ -4,11 +4,11 @@ pcre-config - program to return PCRE configuration
|
|||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||||
.ti +5n
|
.B " [--libs16] [--libs32] [--libs-cpp] [--libs-posix]"
|
||||||
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
.B " [--cflags] [--cflags-posix]"
|
||||||
.ti +5n
|
.fi
|
||||||
.B [--cflags] [--cflags-posix]
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -8,8 +8,8 @@ NAME
|
|||||||
SYNOPSIS
|
SYNOPSIS
|
||||||
|
|
||||||
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||||
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||||
[--cflags] [--cflags-posix]
|
[--cflags] [--cflags-posix]
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE 3 "13 May 2013" "PCRE 8.33"
|
.TH PCRE 3 "01 Oct 2013" "PCRE 8.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
@ -44,7 +44,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||||
release 6.2.0.
|
release 6.3.0.
|
||||||
.P
|
.P
|
||||||
In addition to the Perl-compatible matching function, PCRE contains an
|
In addition to the Perl-compatible matching function, PCRE contains an
|
||||||
alternative function that matches the same compiled patterns in a different
|
alternative function that matches the same compiled patterns in a different
|
||||||
|
2291
pcre/doc/pcre.txt
2291
pcre/doc/pcre.txt
File diff suppressed because it is too large
Load Diff
@ -8,140 +8,120 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.SM
|
.nf
|
||||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP);
|
||||||
.B PCRE_SPTR16 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B const unsigned char *pcre16_maketables(void);
|
.B const unsigned char *pcre16_maketables(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.PP
|
.sp
|
||||||
.B const char *pcre16_version(void);
|
.B const char *pcre16_version(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B void *(*pcre16_malloc)(size_t);
|
.B void *(*pcre16_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre16_free)(void *);
|
.B void (*pcre16_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B void *(*pcre16_stack_malloc)(size_t);
|
.B void *(*pcre16_stack_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre16_stack_free)(void *);
|
.B void (*pcre16_stack_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B int (*pcre16_callout)(pcre16_callout_block *);
|
.B int (*pcre16_callout)(pcre16_callout_block *);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "THE PCRE 16-BIT LIBRARY"
|
.SH "THE PCRE 16-BIT LIBRARY"
|
||||||
|
@ -8,140 +8,119 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.SM
|
.nf
|
||||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B const unsigned char *pcre32_maketables(void);
|
.B const unsigned char *pcre32_maketables(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.PP
|
.sp
|
||||||
.B const char *pcre32_version(void);
|
.B const char *pcre32_version(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B void *(*pcre32_malloc)(size_t);
|
.B void *(*pcre32_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre32_free)(void *);
|
.B void (*pcre32_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B void *(*pcre32_stack_malloc)(size_t);
|
.B void *(*pcre32_stack_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre32_stack_free)(void *);
|
.B void (*pcre32_stack_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B int (*pcre32_callout)(pcre32_callout_block *);
|
.B int (*pcre32_callout)(pcre32_callout_block *);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "THE PCRE 32-BIT LIBRARY"
|
.SH "THE PCRE 32-BIT LIBRARY"
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -56,6 +51,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -64,6 +60,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,£
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
@ -64,6 +56,7 @@ The option bits are:
|
|||||||
PCRE_FIRSTLINE Force matching to be before newline
|
PCRE_FIRSTLINE Force matching to be before newline
|
||||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||||
PCRE_MULTILINE ^ and $ match newlines within data
|
PCRE_MULTILINE ^ and $ match newlines within data
|
||||||
|
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||||
sequences
|
sequences
|
||||||
@ -72,6 +65,8 @@ The option bits are:
|
|||||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||||
theses (named ones available)
|
theses (named ones available)
|
||||||
|
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||||
|
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||||
validity (only relevant if
|
validity (only relevant if
|
||||||
PCRE_UTF16 is set)
|
PCRE_UTF16 is set)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
|
|||||||
target architecture for the JIT compiler,
|
target architecture for the JIT compiler,
|
||||||
or NULL if there is no JIT support
|
or NULL if there is no JIT support
|
||||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||||
Internal recursion depth limit
|
Internal recursion depth limit
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " const char **\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP);"
|
||||||
.B const char *\fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " const char **\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
||||||
.ti +5n
|
.B " int \fImaxsize\fP);"
|
||||||
.B int \fImaxsize\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.sp
|
.sp
|
||||||
.B #include <pcre.h>
|
.B #include <pcre.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||||
.ti +5n
|
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
.B " int \fIkeep_boms\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIkeep_boms\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCREAPI 3 "12 May 2013" "PCRE 8.33"
|
.TH PCREAPI 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.sp
|
.sp
|
||||||
@ -8,138 +8,115 @@ PCRE - Perl-compatible regular expressions
|
|||||||
.SH "PCRE NATIVE API BASIC FUNCTIONS"
|
.SH "PCRE NATIVE API BASIC FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.SM
|
.nf
|
||||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP);"
|
||||||
.B const char **\fIerrptr\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre_free_study(pcre_extra *\fIextra\fP);
|
.B void pcre_free_study(pcre_extra *\fIextra\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE NATIVE API STRING EXTRACTION FUNCTIONS"
|
.SH "PCRE NATIVE API STRING EXTRACTION FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " const char **\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP);"
|
||||||
.B const char *\fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " const char **\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
.sp
|
||||||
.PP
|
|
||||||
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
|
.SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B pcre_jit_stack *\fIjstack\fP);
|
|
||||||
.PP
|
|
||||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
|
.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
|
||||||
.PP
|
.sp
|
||||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||||
.ti +5n
|
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B const unsigned char *pcre_maketables(void);
|
.B const unsigned char *pcre_maketables(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
|
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.PP
|
.sp
|
||||||
.B const char *pcre_version(void);
|
.B const char *pcre_version(void);
|
||||||
.PP
|
.sp
|
||||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE NATIVE API INDIRECTED FUNCTIONS"
|
.SH "PCRE NATIVE API INDIRECTED FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B void *(*pcre_malloc)(size_t);
|
.B void *(*pcre_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre_free)(void *);
|
.B void (*pcre_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B void *(*pcre_stack_malloc)(size_t);
|
.B void *(*pcre_stack_malloc)(size_t);
|
||||||
.PP
|
.sp
|
||||||
.B void (*pcre_stack_free)(void *);
|
.B void (*pcre_stack_free)(void *);
|
||||||
.PP
|
.sp
|
||||||
.B int (*pcre_callout)(pcre_callout_block *);
|
.B int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
|
.SH "PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
|
||||||
@ -482,6 +459,13 @@ the
|
|||||||
\fBpcreposix\fP
|
\fBpcreposix\fP
|
||||||
.\"
|
.\"
|
||||||
documentation.
|
documentation.
|
||||||
|
.sp
|
||||||
|
PCRE_CONFIG_PARENS_LIMIT
|
||||||
|
.sp
|
||||||
|
The output is a long integer that gives the maximum depth of nesting of
|
||||||
|
parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
|
||||||
|
of system stack used when a pattern is compiled. It is specified when PCRE is
|
||||||
|
built; the default is 250.
|
||||||
.sp
|
.sp
|
||||||
PCRE_CONFIG_MATCH_LIMIT
|
PCRE_CONFIG_MATCH_LIMIT
|
||||||
.sp
|
.sp
|
||||||
@ -509,19 +493,16 @@ avoiding the use of the stack.
|
|||||||
.SH "COMPILING A PATTERN"
|
.SH "COMPILING A PATTERN"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.sp
|
.sp
|
||||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||||
.ti +5n
|
.B " int *\fIerrorcodeptr\fP,"
|
||||||
.B int *\fIerrorcodeptr\fP,
|
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||||
.ti +5n
|
.B " const unsigned char *\fItableptr\fP);"
|
||||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B const unsigned char *\fItableptr\fP);
|
|
||||||
.P
|
.P
|
||||||
Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
|
Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
|
||||||
called to compile a pattern into an internal form. The only difference between
|
called to compile a pattern into an internal form. The only difference between
|
||||||
@ -581,8 +562,9 @@ If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of
|
|||||||
character tables that are built when PCRE is compiled, using the default C
|
character tables that are built when PCRE is compiled, using the default C
|
||||||
locale. Otherwise, \fItableptr\fP must be an address that is the result of a
|
locale. Otherwise, \fItableptr\fP must be an address that is the result of a
|
||||||
call to \fBpcre_maketables()\fP. This value is stored with the compiled
|
call to \fBpcre_maketables()\fP. This value is stored with the compiled
|
||||||
pattern, and used again by \fBpcre_exec()\fP, unless another table pointer is
|
pattern, and used again by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP when the
|
||||||
passed to it. For more discussion, see the section on locale support below.
|
pattern is matched. For more discussion, see the section on locale support
|
||||||
|
below.
|
||||||
.P
|
.P
|
||||||
This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
|
This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
|
||||||
.sp
|
.sp
|
||||||
@ -670,12 +652,22 @@ documentation.
|
|||||||
.sp
|
.sp
|
||||||
PCRE_EXTENDED
|
PCRE_EXTENDED
|
||||||
.sp
|
.sp
|
||||||
If this bit is set, white space data characters in the pattern are totally
|
If this bit is set, most white space characters in the pattern are totally
|
||||||
ignored except when escaped or inside a character class. White space does not
|
ignored except when escaped or inside a character class. However, white space
|
||||||
include the VT character (code 11). In addition, characters between an
|
is not allowed within sequences such as (?> that introduce various
|
||||||
unescaped # outside a character class and the next newline, inclusive, are also
|
parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
|
||||||
ignored. This is equivalent to Perl's /x option, and it can be changed within a
|
However, ignorable white space is permitted between an item and a following
|
||||||
pattern by a (?x) option setting.
|
quantifier and between a quantifier and a following + that indicates
|
||||||
|
possessiveness.
|
||||||
|
.P
|
||||||
|
White space did not used to include the VT character (code 11), because Perl
|
||||||
|
did not treat this character as white space. However, Perl changed at release
|
||||||
|
5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
|
||||||
|
.P
|
||||||
|
PCRE_EXTENDED also causes characters between an unescaped # outside a character
|
||||||
|
class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
|
||||||
|
equivalent to Perl's /x option, and it can be changed within a pattern by a
|
||||||
|
(?x) option setting.
|
||||||
.P
|
.P
|
||||||
Which characters are interpreted as newlines is controlled by the options
|
Which characters are interpreted as newlines is controlled by the options
|
||||||
passed to \fBpcre_compile()\fP or by a special sequence at the start of the
|
passed to \fBpcre_compile()\fP or by a special sequence at the start of the
|
||||||
@ -820,6 +812,15 @@ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
|||||||
were followed by ?: but named parentheses can still be used for capturing (and
|
were followed by ?: but named parentheses can still be used for capturing (and
|
||||||
they acquire numbers in the usual way). There is no equivalent of this option
|
they acquire numbers in the usual way). There is no equivalent of this option
|
||||||
in Perl.
|
in Perl.
|
||||||
|
.sp
|
||||||
|
PCRE_NO_AUTO_POSSESS
|
||||||
|
.sp
|
||||||
|
If this option is set, it disables "auto-possessification". This is an
|
||||||
|
optimization that, for example, turns a+b into a++b in order to avoid
|
||||||
|
backtracks into a+ that can never be successful. However, if callouts are in
|
||||||
|
use, auto-possessification means that some of them are never taken. You can set
|
||||||
|
this option if you want the matching functions to do a full unoptimized search
|
||||||
|
and run all the callouts, but it is mainly provided for testing purposes.
|
||||||
.sp
|
.sp
|
||||||
PCRE_NO_START_OPTIMIZE
|
PCRE_NO_START_OPTIMIZE
|
||||||
.sp
|
.sp
|
||||||
@ -886,10 +887,10 @@ page. If an invalid UTF-8 sequence is found, \fBpcre_compile()\fP returns an
|
|||||||
error. If you already know that your pattern is valid, and you want to skip
|
error. If you already know that your pattern is valid, and you want to skip
|
||||||
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
||||||
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
||||||
undefined. It may cause your program to crash. Note that this option can also
|
undefined. It may cause your program to crash or loop. Note that this option
|
||||||
be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress the
|
can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress
|
||||||
validity checking of subject strings only. If the same string is being matched
|
the validity checking of subject strings only. If the same string is being
|
||||||
many times, the option can be safely set for the second and subsequent
|
matched many times, the option can be safely set for the second and subsequent
|
||||||
matchings to improve performance.
|
matchings to improve performance.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
@ -936,7 +937,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
31 POSIX collating elements are not supported
|
31 POSIX collating elements are not supported
|
||||||
32 this version of PCRE is compiled without UTF support
|
32 this version of PCRE is compiled without UTF support
|
||||||
33 [this code is not in use]
|
33 [this code is not in use]
|
||||||
34 character value in \ex{...} sequence is too large
|
34 character value in \ex{} or \eo{} is too large
|
||||||
35 invalid condition (?(0)
|
35 invalid condition (?(0)
|
||||||
36 \eC not allowed in lookbehind assertion
|
36 \eC not allowed in lookbehind assertion
|
||||||
37 PCRE does not support \eL, \el, \eN{name}, \eU, or \eu
|
37 PCRE does not support \eL, \el, \eN{name}, \eU, or \eu
|
||||||
@ -984,6 +985,12 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
|||||||
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
76 character value in \eu.... sequence is too large
|
76 character value in \eu.... sequence is too large
|
||||||
77 invalid UTF-32 string (specifically UTF-32)
|
77 invalid UTF-32 string (specifically UTF-32)
|
||||||
|
78 setting UTF is disabled by the application
|
||||||
|
79 non-hex character in \ex{} (closing brace missing?)
|
||||||
|
80 non-octal character in \eo{} (closing brace missing?)
|
||||||
|
81 missing opening brace after \eo
|
||||||
|
82 parentheses are too deeply nested
|
||||||
|
83 invalid range in character class
|
||||||
.sp
|
.sp
|
||||||
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
||||||
be used if the limits were changed when PCRE was built.
|
be used if the limits were changed when PCRE was built.
|
||||||
@ -993,9 +1000,10 @@ be used if the limits were changed when PCRE was built.
|
|||||||
.SH "STUDYING A PATTERN"
|
.SH "STUDYING A PATTERN"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP
|
.nf
|
||||||
.ti +5n
|
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||||
.B const char **\fIerrptr\fP);
|
.B " const char **\fIerrptr\fP);"
|
||||||
|
.fi
|
||||||
.PP
|
.PP
|
||||||
If a compiled pattern is going to be used several times, it is worth spending
|
If a compiled pattern is going to be used several times, it is worth spending
|
||||||
more time analyzing it in order to speed up the time taken for matching. The
|
more time analyzing it in order to speed up the time taken for matching. The
|
||||||
@ -1117,15 +1125,17 @@ below.
|
|||||||
.sp
|
.sp
|
||||||
PCRE handles caseless matching, and determines whether characters are letters,
|
PCRE handles caseless matching, and determines whether characters are letters,
|
||||||
digits, or whatever, by reference to a set of tables, indexed by character
|
digits, or whatever, by reference to a set of tables, indexed by character
|
||||||
value. When running in UTF-8 mode, this applies only to characters
|
code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
|
||||||
with codes less than 128. By default, higher-valued codes never match escapes
|
applies only to characters with code points less than 256. By default,
|
||||||
such as \ew or \ed, but they can be tested with \ep if PCRE is built with
|
higher-valued code points never match escapes such as \ew or \ed. However, if
|
||||||
Unicode character property support. Alternatively, the PCRE_UCP option can be
|
PCRE is built with Unicode property support, all characters can be tested with
|
||||||
set at compile time; this causes \ew and friends to use Unicode property
|
\ep and \eP, or, alternatively, the PCRE_UCP option can be set when a pattern
|
||||||
support instead of built-in tables. The use of locales with Unicode is
|
is compiled; this causes \ew and friends to use Unicode property support
|
||||||
discouraged. If you are handling characters with codes greater than 128, you
|
instead of the built-in tables.
|
||||||
should either use UTF-8 and Unicode, or use locales, but not try to mix the
|
.P
|
||||||
two.
|
The use of locales with Unicode is discouraged. If you are handling characters
|
||||||
|
with code points greater than 128, you should either use Unicode support, or
|
||||||
|
use locales, but not try to mix the two.
|
||||||
.P
|
.P
|
||||||
PCRE contains an internal set of tables that are used when the final argument
|
PCRE contains an internal set of tables that are used when the final argument
|
||||||
of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
|
of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
|
||||||
@ -1140,10 +1150,10 @@ for this locale support is expected to die away.
|
|||||||
.P
|
.P
|
||||||
External tables are built by calling the \fBpcre_maketables()\fP function,
|
External tables are built by calling the \fBpcre_maketables()\fP function,
|
||||||
which has no arguments, in the relevant locale. The result can then be passed
|
which has no arguments, in the relevant locale. The result can then be passed
|
||||||
to \fBpcre_compile()\fP or \fBpcre_exec()\fP as often as necessary. For
|
to \fBpcre_compile()\fP as often as necessary. For example, to build and use
|
||||||
example, to build and use tables that are appropriate for the French locale
|
tables that are appropriate for the French locale (where accented characters
|
||||||
(where accented characters with values greater than 128 are treated as letters),
|
with values greater than 128 are treated as letters), the following code could
|
||||||
the following code could be used:
|
be used:
|
||||||
.sp
|
.sp
|
||||||
setlocale(LC_CTYPE, "fr_FR");
|
setlocale(LC_CTYPE, "fr_FR");
|
||||||
tables = pcre_maketables();
|
tables = pcre_maketables();
|
||||||
@ -1159,24 +1169,29 @@ needed.
|
|||||||
.P
|
.P
|
||||||
The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
|
The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
|
||||||
pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
|
pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
|
||||||
and normally also by \fBpcre_exec()\fP. Thus, by default, for any single
|
and also by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP. Thus, for any single
|
||||||
pattern, compilation, studying and matching all happen in the same locale, but
|
pattern, compilation, studying and matching all happen in the same locale, but
|
||||||
different patterns can be compiled in different locales.
|
different patterns can be processed in different locales.
|
||||||
.P
|
.P
|
||||||
It is possible to pass a table pointer or NULL (indicating the use of the
|
It is possible to pass a table pointer or NULL (indicating the use of the
|
||||||
internal tables) to \fBpcre_exec()\fP. Although not intended for this purpose,
|
internal tables) to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP (see the
|
||||||
this facility could be used to match a pattern in a different locale from the
|
discussion below in the section on matching a pattern). This facility is
|
||||||
one in which it was compiled. Passing table pointers at run time is discussed
|
provided for use with pre-compiled patterns that have been saved and reloaded.
|
||||||
below in the section on matching a pattern.
|
Character tables are not saved with patterns, so if a non-standard table was
|
||||||
|
used at compile time, it must be provided again when the reloaded pattern is
|
||||||
|
matched. Attempting to use this facility to match a pattern in a different
|
||||||
|
locale from the one in which it was compiled is likely to lead to anomalous
|
||||||
|
(usually incorrect) results.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="infoaboutpattern"></a>
|
.\" HTML <a name="infoaboutpattern"></a>
|
||||||
.SH "INFORMATION ABOUT A PATTERN"
|
.SH "INFORMATION ABOUT A PATTERN"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
.fi
|
||||||
.PP
|
.PP
|
||||||
The \fBpcre_fullinfo()\fP function returns information about a compiled
|
The \fBpcre_fullinfo()\fP function returns information about a compiled
|
||||||
pattern. It replaces the \fBpcre_info()\fP function, which was removed from the
|
pattern. It replaces the \fBpcre_info()\fP function, which was removed from the
|
||||||
@ -1310,9 +1325,14 @@ only if it follows something of variable length. For example, for the pattern
|
|||||||
is -1.
|
is -1.
|
||||||
.P
|
.P
|
||||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
||||||
to return the full 32-bit range of the character, this value is deprecated;
|
to return the full 32-bit range of characters, this value is deprecated;
|
||||||
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
||||||
be used.
|
be used.
|
||||||
|
.sp
|
||||||
|
PCRE_INFO_MATCH_EMPTY
|
||||||
|
.sp
|
||||||
|
Return 1 if the pattern can match an empty string, otherwise 0. The fourth
|
||||||
|
argument should point to an \fBint\fP variable.
|
||||||
.sp
|
.sp
|
||||||
PCRE_INFO_MATCHLIMIT
|
PCRE_INFO_MATCHLIMIT
|
||||||
.sp
|
.sp
|
||||||
@ -1369,8 +1389,8 @@ most significant byte first. In the 16-bit library, the pointer points to
|
|||||||
contains the parenthesis number. The rest of the entry is the corresponding
|
contains the parenthesis number. The rest of the entry is the corresponding
|
||||||
name, zero terminated.
|
name, zero terminated.
|
||||||
.P
|
.P
|
||||||
The names are in alphabetical order. Duplicate names may appear if (?| is used
|
The names are in alphabetical order. If (?| is used to create multiple groups
|
||||||
to create multiple groups with the same number, as described in the
|
with the same number, as described in the
|
||||||
.\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
|
.\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
section on duplicate subpattern numbers
|
section on duplicate subpattern numbers
|
||||||
@ -1379,11 +1399,13 @@ in the
|
|||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrepattern\fP
|
\fBpcrepattern\fP
|
||||||
.\"
|
.\"
|
||||||
page. Duplicate names for subpatterns with different numbers are permitted only
|
page, the groups may be given the same name, but there is only one entry in the
|
||||||
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
|
table. Different names for groups of the same number are not permitted.
|
||||||
table in the order in which they were found in the pattern. In the absence of
|
Duplicate names for subpatterns with different numbers are permitted,
|
||||||
(?| this is the order of increasing number; when (?| is used this is not
|
but only if PCRE_DUPNAMES is set. They appear in the table in the order in
|
||||||
necessarily the case because later subpatterns may have lower numbers.
|
which they were found in the pattern. In the absence of (?| this is the order
|
||||||
|
of increasing number; when (?| is used this is not necessarily the case because
|
||||||
|
later subpatterns may have lower numbers.
|
||||||
.P
|
.P
|
||||||
As a simple example of the name/number table, consider the following pattern
|
As a simple example of the name/number table, consider the following pattern
|
||||||
after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
|
after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
|
||||||
@ -1501,25 +1523,13 @@ returned. For anchored patterns, 0 is returned.
|
|||||||
.sp
|
.sp
|
||||||
PCRE_INFO_FIRSTCHARACTER
|
PCRE_INFO_FIRSTCHARACTER
|
||||||
.sp
|
.sp
|
||||||
Return the fixed first character value, if PCRE_INFO_FIRSTCHARACTERFLAGS
|
Return the fixed first character value in the situation where
|
||||||
returned 1; otherwise returns 0. The fourth argument should point to an
|
PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; otherwise return 0. The fourth
|
||||||
\fBuint_t\fP variable.
|
argument should point to an \fBuint_t\fP variable.
|
||||||
.P
|
.P
|
||||||
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
||||||
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
||||||
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
||||||
.P
|
|
||||||
If there is no fixed first value, and if either
|
|
||||||
.sp
|
|
||||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
|
||||||
starts with "^", or
|
|
||||||
.sp
|
|
||||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
|
||||||
(if it were set, the pattern would be anchored),
|
|
||||||
.sp
|
|
||||||
-1 is returned, indicating that the pattern matches only at the start of a
|
|
||||||
subject string or after any newline within the string. Otherwise -2 is
|
|
||||||
returned. For anchored patterns, -2 is returned.
|
|
||||||
.sp
|
.sp
|
||||||
PCRE_INFO_REQUIREDCHARFLAGS
|
PCRE_INFO_REQUIREDCHARFLAGS
|
||||||
.sp
|
.sp
|
||||||
@ -1567,11 +1577,11 @@ is different. (This seems a highly unlikely scenario.)
|
|||||||
.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
|
.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||||
.ti +5n
|
.fi
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
|
||||||
.P
|
.P
|
||||||
The function \fBpcre_exec()\fP is called to match a subject string against a
|
The function \fBpcre_exec()\fP is called to match a subject string against a
|
||||||
compiled pattern, which is passed in the \fIcode\fP argument. If the
|
compiled pattern, which is passed in the \fIcode\fP argument. If the
|
||||||
@ -1724,19 +1734,23 @@ and is described in the
|
|||||||
.\"
|
.\"
|
||||||
documentation.
|
documentation.
|
||||||
.P
|
.P
|
||||||
The \fItables\fP field is used to pass a character tables pointer to
|
The \fItables\fP field is provided for use with patterns that have been
|
||||||
\fBpcre_exec()\fP; this overrides the value that is stored with the compiled
|
pre-compiled using custom character tables, saved to disc or elsewhere, and
|
||||||
pattern. A non-NULL value is stored with the compiled pattern only if custom
|
then reloaded, because the tables that were used to compile a pattern are not
|
||||||
tables were supplied to \fBpcre_compile()\fP via its \fItableptr\fP argument.
|
saved with it. See the
|
||||||
If NULL is passed to \fBpcre_exec()\fP using this mechanism, it forces PCRE's
|
|
||||||
internal tables to be used. This facility is helpful when re-using patterns
|
|
||||||
that have been saved after compiling with an external set of tables, because
|
|
||||||
the external tables might be at a different address when \fBpcre_exec()\fP is
|
|
||||||
called. See the
|
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcreprecompile\fP
|
\fBpcreprecompile\fP
|
||||||
.\"
|
.\"
|
||||||
documentation for a discussion of saving compiled patterns for later use.
|
documentation for a discussion of saving compiled patterns for later use. If
|
||||||
|
NULL is passed using this mechanism, it forces PCRE's internal tables to be
|
||||||
|
used.
|
||||||
|
.P
|
||||||
|
\fBWarning:\fP The tables that \fBpcre_exec()\fP uses must be the same as those
|
||||||
|
that were used when the pattern was compiled. If this is not the case, the
|
||||||
|
behaviour of \fBpcre_exec()\fP is undefined. Therefore, when a pattern is
|
||||||
|
compiled and matched in the same process, this field should never be set. In
|
||||||
|
this (the most common) case, the correct table pointer is automatically passed
|
||||||
|
with the compiled pattern from \fBpcre_compile()\fP to \fBpcre_exec()\fP.
|
||||||
.P
|
.P
|
||||||
If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must
|
If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must
|
||||||
be set to point to a suitable variable. If the pattern contains any
|
be set to point to a suitable variable. If the pattern contains any
|
||||||
@ -1960,7 +1974,7 @@ all the matches in a single subject string. However, you should be sure that
|
|||||||
the value of \fIstartoffset\fP points to the start of a character (or the end
|
the value of \fIstartoffset\fP points to the start of a character (or the end
|
||||||
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
||||||
invalid string as a subject or an invalid value of \fIstartoffset\fP is
|
invalid string as a subject or an invalid value of \fIstartoffset\fP is
|
||||||
undefined. Your program may crash.
|
undefined. Your program may crash or loop.
|
||||||
.sp
|
.sp
|
||||||
PCRE_PARTIAL_HARD
|
PCRE_PARTIAL_HARD
|
||||||
PCRE_PARTIAL_SOFT
|
PCRE_PARTIAL_SOFT
|
||||||
@ -2423,21 +2437,18 @@ no longer in use and is never returned.
|
|||||||
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
|
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
.B " int \fIbuffersize\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||||
.ti +5n
|
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
.B " const char **\fIstringptr\fP);"
|
||||||
.ti +5n
|
.sp
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||||
.ti +5n
|
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
.fi
|
||||||
.PP
|
.PP
|
||||||
Captured substrings can be accessed directly by using the offsets returned by
|
Captured substrings can be accessed directly by using the offsets returned by
|
||||||
\fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
|
\fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
|
||||||
@ -2516,25 +2527,20 @@ provided.
|
|||||||
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
|
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP);"
|
||||||
.B const char *\fIname\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
|
||||||
.PP
|
|
||||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||||
.ti +5n
|
.B " const char **\fIstringptr\fP);"
|
||||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B const char **\fIstringptr\fP);
|
|
||||||
.PP
|
.PP
|
||||||
To extract a substring by name, you first have to find associated number.
|
To extract a substring by name, you first have to find associated number.
|
||||||
For example, for this pattern
|
For example, for this pattern
|
||||||
@ -2586,9 +2592,10 @@ same number causes an error at compile time.
|
|||||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||||
.ti +5n
|
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
.fi
|
||||||
.PP
|
.PP
|
||||||
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
||||||
are not required to be unique. (Duplicate names are always allowed for
|
are not required to be unique. (Duplicate names are always allowed for
|
||||||
@ -2677,13 +2684,12 @@ the value returned is the size of each block that is obtained from the heap.
|
|||||||
.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
|
.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||||
.ti +5n
|
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||||
.ti +5n
|
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
.fi
|
||||||
.ti +5n
|
|
||||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
|
||||||
.P
|
.P
|
||||||
The function \fBpcre_dfa_exec()\fP is called to match a subject string against
|
The function \fBpcre_dfa_exec()\fP is called to match a subject string against
|
||||||
a compiled pattern, using a matching algorithm that scans the subject string
|
a compiled pattern, using a matching algorithm that scans the subject string
|
||||||
@ -2810,6 +2816,14 @@ matching string is given first. If there were too many matches to fit into
|
|||||||
\fIovector\fP, the yield of the function is zero, and the vector is filled with
|
\fIovector\fP, the yield of the function is zero, and the vector is filled with
|
||||||
the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use
|
the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use
|
||||||
the entire \fIovector\fP for returning matched strings.
|
the entire \fIovector\fP for returning matched strings.
|
||||||
|
.P
|
||||||
|
NOTE: PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Error returns from \fBpcre_dfa_exec()\fP"
|
.SS "Error returns from \fBpcre_dfa_exec()\fP"
|
||||||
@ -2886,6 +2900,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 12 May 2013
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRECALLOUT 3 "03 March 2013" "PCRE 8.33"
|
.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -55,17 +55,50 @@ The
|
|||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcretest\fP
|
\fBpcretest\fP
|
||||||
.\"
|
.\"
|
||||||
command has an option that sets automatic callouts; when it is used, the output
|
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||||
indicates how the pattern is matched. This is useful information when you are
|
used, the output indicates how the pattern is being matched. This is useful
|
||||||
trying to optimize the performance of a particular pattern.
|
information when you are trying to optimize the performance of a particular
|
||||||
|
pattern.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "MISSING CALLOUTS"
|
.SH "MISSING CALLOUTS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
You should be aware that, because of optimizations in the way PCRE matches
|
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||||
patterns by default, callouts sometimes do not happen. For example, if the
|
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||||
pattern is
|
.P
|
||||||
|
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||||
|
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||||
|
if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and
|
||||||
|
then applied with automatic callouts to the string "aaaa" is:
|
||||||
|
.sp
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
No match
|
||||||
|
.sp
|
||||||
|
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||||
|
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||||
|
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||||
|
to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||||
|
this is done in \fBpcretest\fP (using the /O qualifier), the output changes to
|
||||||
|
this:
|
||||||
|
.sp
|
||||||
|
--->aaaa
|
||||||
|
+0 ^ ^
|
||||||
|
+1 ^ a+
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^ ^ [bc]
|
||||||
|
+3 ^^ [bc]
|
||||||
|
No match
|
||||||
|
.sp
|
||||||
|
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||||
|
again, repeatedly, until a+ itself fails.
|
||||||
|
.P
|
||||||
|
Other optimizations that provide fast "no match" results also affect callouts.
|
||||||
|
For example, if the pattern is
|
||||||
.sp
|
.sp
|
||||||
ab(?C4)cd
|
ab(?C4)cd
|
||||||
.sp
|
.sp
|
||||||
@ -89,11 +122,11 @@ callouts such as the example above are obeyed.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
During matching, when PCRE reaches a callout point, the external function
|
During matching, when PCRE reaches a callout point, the external function
|
||||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
|
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is
|
||||||
(if it is set). This applies to both normal and DFA matching. The only
|
set). This applies to both normal and DFA matching. The only argument to the
|
||||||
argument to the callout function is a pointer to a \fBpcre_callout\fP
|
callout function is a pointer to a \fBpcre_callout\fP or
|
||||||
or \fBpcre[16|32]_callout\fP block.
|
\fBpcre[16|32]_callout\fP block. These structures contains the following
|
||||||
These structures contains the following fields:
|
fields:
|
||||||
.sp
|
.sp
|
||||||
int \fIversion\fP;
|
int \fIversion\fP;
|
||||||
int \fIcallout_number\fP;
|
int \fIcallout_number\fP;
|
||||||
@ -217,6 +250,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 03 March 2013
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRECOMPAT 3 "19 March 2013" "PCRE 8.33"
|
.TH PCRECOMPAT 3 "10 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
|
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
|
||||||
@ -122,16 +122,21 @@ an error is given at compile time.
|
|||||||
.P
|
.P
|
||||||
15. Perl recognizes comments in some places that PCRE does not, for example,
|
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||||
PCRE_EXTENDED option is set.
|
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||||
.P
|
.P
|
||||||
16. In PCRE, the upper/lower case character properties Lu and Ll are not
|
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||||
|
warning features, so it gives an error in these cases because they are almost
|
||||||
|
certainly user mistakes.
|
||||||
|
.P
|
||||||
|
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||||
affected when case-independent matching is specified. For example, \ep{Lu}
|
affected when case-independent matching is specified. For example, \ep{Lu}
|
||||||
always matches an upper case letter. I think Perl has changed in this respect;
|
always matches an upper case letter. I think Perl has changed in this respect;
|
||||||
in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
|
in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
|
||||||
letters, regardless of case, when case independence is specified.
|
letters, regardless of case, when case independence is specified.
|
||||||
.P
|
.P
|
||||||
17. PCRE provides some extensions to the Perl regular expression facilities.
|
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||||
is with respect to Perl 5.10:
|
is with respect to Perl 5.10:
|
||||||
@ -190,6 +195,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 19 March 2013
|
Last updated: 10 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRELIMITS 3 "24 June 2012" "PCRE 8.30"
|
.TH PCRELIMITS 3 "05 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "SIZE AND OTHER LIMITATIONS"
|
.SH "SIZE AND OTHER LIMITATIONS"
|
||||||
@ -8,9 +8,10 @@ There are some size limitations in PCRE but it is hoped that they will never in
|
|||||||
practice be relevant.
|
practice be relevant.
|
||||||
.P
|
.P
|
||||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||||
|
library. If you want to process regular expressions that are truly enormous,
|
||||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||||
16-bit or 32-bit library, 3 is rounded up to 4). See the \fBREADME\fP file in
|
16-bit or 32-bit library, 3 is rounded up to 4). See the \fBREADME\fP file in
|
||||||
the source distribution and the
|
the source distribution and the
|
||||||
@ -23,7 +24,10 @@ However, the speed of execution is slower.
|
|||||||
All values in repeating quantifiers must be less than 65536.
|
All values in repeating quantifiers must be less than 65536.
|
||||||
.P
|
.P
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns.
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
|
order to limit the amount of system stack used at compile time. The limit can
|
||||||
|
be specified when PCRE is built; the default is 250.
|
||||||
.P
|
.P
|
||||||
There is a limit to the number of forward references to subsequent subpatterns
|
There is a limit to the number of forward references to subsequent subpatterns
|
||||||
of around 200,000. Repeated forward references with fixed upper limits, for
|
of around 200,000. Repeated forward references with fixed upper limits, for
|
||||||
@ -34,7 +38,7 @@ The maximum length of name for a named subpattern is 32 characters, and the
|
|||||||
maximum number of named subpatterns is 10000.
|
maximum number of named subpatterns is 10000.
|
||||||
.P
|
.P
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||||
.P
|
.P
|
||||||
The maximum length of a subject string is the largest positive number that an
|
The maximum length of a subject string is the largest positive number that an
|
||||||
integer variable can hold. However, when using the traditional matching
|
integer variable can hold. However, when using the traditional matching
|
||||||
@ -62,6 +66,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 04 May 2012
|
Last updated: 05 November 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCREMATCHING 3 "08 January 2012" "PCRE 8.30"
|
.TH PCREMATCHING 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "PCRE MATCHING ALGORITHMS"
|
.SH "PCRE MATCHING ALGORITHMS"
|
||||||
@ -106,6 +106,14 @@ the three strings "caterpillar", "cater", and "cat" that start at the fifth
|
|||||||
character of the subject. The algorithm does not automatically move on to find
|
character of the subject. The algorithm does not automatically move on to find
|
||||||
matches that start at later positions.
|
matches that start at later positions.
|
||||||
.P
|
.P
|
||||||
|
PCRE's "auto-possessification" optimization usually applies to character
|
||||||
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
|
pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point
|
||||||
|
even considering the possibility of backtracking into the repeated digits. For
|
||||||
|
DFA matching, this means that only one possible match is found. If you really
|
||||||
|
do want multiple matches in such cases, either use an ungreedy repeat
|
||||||
|
("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||||
|
.P
|
||||||
There are a number of features of PCRE regular expressions that are not
|
There are a number of features of PCRE regular expressions that are not
|
||||||
supported by the alternative matching algorithm. They are as follows:
|
supported by the alternative matching algorithm. They are as follows:
|
||||||
.P
|
.P
|
||||||
@ -201,6 +209,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 January 2012
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2012 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCREPARTIAL 3 "20 February 2013" "PCRE 8.33"
|
.TH PCREPARTIAL 3 "02 July 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "PARTIAL MATCHING IN PCRE"
|
.SH "PARTIAL MATCHING IN PCRE"
|
||||||
@ -280,6 +280,15 @@ Notice that when the match is complete, only the last part is shown; PCRE does
|
|||||||
not retain the previously partially-matched string. It is up to the calling
|
not retain the previously partially-matched string. It is up to the calling
|
||||||
program to do that if it needs to.
|
program to do that if it needs to.
|
||||||
.P
|
.P
|
||||||
|
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||||
|
not possible to try again at a new starting point. All this facility is capable
|
||||||
|
of doing is continuing with the previous match attempt. In the previous
|
||||||
|
example, if the second set of data is "ug23" the result is no match, even
|
||||||
|
though there would be a match for "aug23" if the entire string were given at
|
||||||
|
once. Depending on the application, this may or may not be what you want.
|
||||||
|
The only way to allow for starting again at the next character is to retain the
|
||||||
|
matched part of the subject and try a new complete match.
|
||||||
|
.P
|
||||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||||
facility can be used to pass very long subject strings to the DFA matching
|
facility can be used to pass very long subject strings to the DFA matching
|
||||||
@ -462,6 +471,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 20 February 2013
|
Last updated: 02 July 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCREPATTERN 3 "26 April 2013" "PCRE 8.33"
|
.TH PCREPATTERN 3 "03 December 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "PCRE REGULAR EXPRESSION DETAILS"
|
.SH "PCRE REGULAR EXPRESSION DETAILS"
|
||||||
@ -80,21 +80,37 @@ appearance causes an error.
|
|||||||
.SS "Unicode property support"
|
.SS "Unicode property support"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
Another special sequence that may appear at the start of a pattern is
|
Another special sequence that may appear at the start of a pattern is (*UCP).
|
||||||
.sp
|
|
||||||
(*UCP)
|
|
||||||
.sp
|
|
||||||
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
||||||
such as \ed and \ew to use Unicode properties to determine character types,
|
such as \ed and \ew to use Unicode properties to determine character types,
|
||||||
instead of recognizing only characters with codes less than 128 via a lookup
|
instead of recognizing only characters with codes less than 128 via a lookup
|
||||||
table.
|
table.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Disabling auto-possessification"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting
|
||||||
|
the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making
|
||||||
|
quantifiers possessive when what follows cannot match the repeated item. For
|
||||||
|
example, by default a+b is treated as a++b. For more details, see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
documentation.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SS "Disabling start-up optimizations"
|
.SS "Disabling start-up optimizations"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
||||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time.
|
PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables
|
||||||
|
several optimizations for quickly reaching "no match" results. For more
|
||||||
|
details, see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcreapi\fP
|
||||||
|
.\"
|
||||||
|
documentation.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="newlines"></a>
|
.\" HTML <a name="newlines"></a>
|
||||||
@ -164,10 +180,10 @@ pattern of the form
|
|||||||
(*LIMIT_RECURSION=d)
|
(*LIMIT_RECURSION=d)
|
||||||
.sp
|
.sp
|
||||||
where d is any number of decimal digits. However, the value of the setting must
|
where d is any number of decimal digits. However, the value of the setting must
|
||||||
be less than the value set by the caller of \fBpcre_exec()\fP for it to have
|
be less than the value set (or defaulted) by the caller of \fBpcre_exec()\fP
|
||||||
any effect. In other words, the pattern writer can lower the limit set by the
|
for it to have any effect. In other words, the pattern writer can lower the
|
||||||
programmer, but not raise it. If there is more than one setting of one of these
|
limits set by the programmer, but not raise them. If there is more than one
|
||||||
limits, the lower value is used.
|
setting of one of these limits, the lower value is used.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "EBCDIC CHARACTER CODES"
|
.SH "EBCDIC CHARACTER CODES"
|
||||||
@ -257,10 +273,11 @@ In a UTF mode, only ASCII numbers and letters have any special meaning after a
|
|||||||
backslash. All other characters (in particular, those whose codepoints are
|
backslash. All other characters (in particular, those whose codepoints are
|
||||||
greater than 127) are treated as literals.
|
greater than 127) are treated as literals.
|
||||||
.P
|
.P
|
||||||
If a pattern is compiled with the PCRE_EXTENDED option, white space in the
|
If a pattern is compiled with the PCRE_EXTENDED option, most white space in the
|
||||||
pattern (other than in a character class) and characters between a # outside
|
pattern (other than in a character class), and characters between a # outside a
|
||||||
a character class and the next newline are ignored. An escaping backslash can
|
character class and the next newline, inclusive, are ignored. An escaping
|
||||||
be used to include a white space or # character as part of the pattern.
|
backslash can be used to include a white space or # character as part of the
|
||||||
|
pattern.
|
||||||
.P
|
.P
|
||||||
If you want to remove the special meaning from a sequence of characters, you
|
If you want to remove the special meaning from a sequence of characters, you
|
||||||
can do so by putting them between \eQ and \eE. This is different from Perl in
|
can do so by putting them between \eQ and \eE. This is different from Perl in
|
||||||
@ -300,7 +317,9 @@ one of the following escape sequences than the binary character it represents:
|
|||||||
\en linefeed (hex 0A)
|
\en linefeed (hex 0A)
|
||||||
\er carriage return (hex 0D)
|
\er carriage return (hex 0D)
|
||||||
\et tab (hex 09)
|
\et tab (hex 09)
|
||||||
|
\e0dd character with octal code 0dd
|
||||||
\eddd character with octal code ddd, or back reference
|
\eddd character with octal code ddd, or back reference
|
||||||
|
\eo{ddd..} character with octal code ddd..
|
||||||
\exhh character with hex code hh
|
\exhh character with hex code hh
|
||||||
\ex{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
\ex{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
||||||
\euhhhh character with hex code hhhh (JavaScript mode only)
|
\euhhhh character with hex code hhhh (JavaScript mode only)
|
||||||
@ -321,47 +340,27 @@ byte are inverted. Thus \ecA becomes hex 01, as in ASCII (A is C1), but because
|
|||||||
the EBCDIC letters are disjoint, \ecZ becomes hex 29 (Z is E9), and other
|
the EBCDIC letters are disjoint, \ecZ becomes hex 29 (Z is E9), and other
|
||||||
characters also generate different values.
|
characters also generate different values.
|
||||||
.P
|
.P
|
||||||
By default, after \ex, from zero to two hexadecimal digits are read (letters
|
|
||||||
can be in upper or lower case). Any number of hexadecimal digits may appear
|
|
||||||
between \ex{ and }, but the character code is constrained as follows:
|
|
||||||
.sp
|
|
||||||
8-bit non-UTF mode less than 0x100
|
|
||||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
|
||||||
16-bit non-UTF mode less than 0x10000
|
|
||||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
|
||||||
32-bit non-UTF mode less than 0x80000000
|
|
||||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
|
||||||
.sp
|
|
||||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
|
||||||
"surrogate" codepoints), and 0xffef.
|
|
||||||
.P
|
|
||||||
If characters other than hexadecimal digits appear between \ex{ and }, or if
|
|
||||||
there is no terminating }, this form of escape is not recognized. Instead, the
|
|
||||||
initial \ex will be interpreted as a basic hexadecimal escape, with no
|
|
||||||
following digits, giving a character whose value is zero.
|
|
||||||
.P
|
|
||||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is
|
|
||||||
as just described only when it is followed by two hexadecimal digits.
|
|
||||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
|
||||||
code points greater than 256 is provided by \eu, which must be followed by
|
|
||||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
|
||||||
Character codes specified by \eu in JavaScript mode are constrained in the same
|
|
||||||
was as those specified by \ex in non-JavaScript mode.
|
|
||||||
.P
|
|
||||||
Characters whose value is less than 256 can be defined by either of the two
|
|
||||||
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
|
|
||||||
way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
|
|
||||||
\eu00dc in JavaScript mode).
|
|
||||||
.P
|
|
||||||
After \e0 up to two further octal digits are read. If there are fewer than two
|
After \e0 up to two further octal digits are read. If there are fewer than two
|
||||||
digits, just those that are present are used. Thus the sequence \e0\ex\e07
|
digits, just those that are present are used. Thus the sequence \e0\ex\e07
|
||||||
specifies two binary zeros followed by a BEL character (code value 7). Make
|
specifies two binary zeros followed by a BEL character (code value 7). Make
|
||||||
sure you supply two digits after the initial zero if the pattern character that
|
sure you supply two digits after the initial zero if the pattern character that
|
||||||
follows is itself an octal digit.
|
follows is itself an octal digit.
|
||||||
.P
|
.P
|
||||||
The handling of a backslash followed by a digit other than 0 is complicated.
|
The escape \eo must be followed by a sequence of octal digits, enclosed in
|
||||||
Outside a character class, PCRE reads it and any following digits as a decimal
|
braces. An error occurs if this is not the case. This escape is a recent
|
||||||
number. If the number is less than 10, or if there have been at least that many
|
addition to Perl; it provides way of specifying character code points as octal
|
||||||
|
numbers greater than 0777, and it also allows octal numbers and back references
|
||||||
|
to be unambiguously specified.
|
||||||
|
.P
|
||||||
|
For greater clarity and unambiguity, it is best to avoid following \e by a
|
||||||
|
digit greater than zero. Instead, use \eo{} or \ex{} to specify character
|
||||||
|
numbers, and \eg{} to specify back references. The following paragraphs
|
||||||
|
describe the old, ambiguous syntax.
|
||||||
|
.P
|
||||||
|
The handling of a backslash followed by a digit other than 0 is complicated,
|
||||||
|
and Perl has changed in recent releases, causing PCRE also to change. Outside a
|
||||||
|
character class, PCRE reads the digit and any following digits as a decimal
|
||||||
|
number. If the number is less than 8, or if there have been at least that many
|
||||||
previous capturing left parentheses in the expression, the entire sequence is
|
previous capturing left parentheses in the expression, the entire sequence is
|
||||||
taken as a \fIback reference\fP. A description of how this works is given
|
taken as a \fIback reference\fP. A description of how this works is given
|
||||||
.\" HTML <a href="#backreferences">
|
.\" HTML <a href="#backreferences">
|
||||||
@ -374,12 +373,11 @@ following the discussion of
|
|||||||
parenthesized subpatterns.
|
parenthesized subpatterns.
|
||||||
.\"
|
.\"
|
||||||
.P
|
.P
|
||||||
Inside a character class, or if the decimal number is greater than 9 and there
|
Inside a character class, or if the decimal number following \e is greater than
|
||||||
have not been that many capturing subpatterns, PCRE re-reads up to three octal
|
7 and there have not been that many capturing subpatterns, PCRE handles \e8 and
|
||||||
digits following the backslash, and uses them to generate a data character. Any
|
\e9 as the literal characters "8" and "9", and otherwise re-reads up to three
|
||||||
subsequent digits stand for themselves. The value of the character is
|
octal digits following the backslash, using them to generate a data character.
|
||||||
constrained in the same way as characters specified in hexadecimal.
|
Any subsequent digits stand for themselves. For example:
|
||||||
For example:
|
|
||||||
.sp
|
.sp
|
||||||
\e040 is another way of writing an ASCII space
|
\e040 is another way of writing an ASCII space
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
@ -398,12 +396,51 @@ For example:
|
|||||||
\e377 might be a back reference, otherwise
|
\e377 might be a back reference, otherwise
|
||||||
the value 255 (decimal)
|
the value 255 (decimal)
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
\e81 is either a back reference, or a binary zero
|
\e81 is either a back reference, or the two
|
||||||
followed by the two characters "8" and "1"
|
characters "8" and "1"
|
||||||
.sp
|
.sp
|
||||||
Note that octal values of 100 or greater must not be introduced by a leading
|
Note that octal values of 100 or greater that are specified using this syntax
|
||||||
zero, because no more than three octal digits are ever read.
|
must not be introduced by a leading zero, because no more than three octal
|
||||||
|
digits are ever read.
|
||||||
.P
|
.P
|
||||||
|
By default, after \ex that is not followed by {, from zero to two hexadecimal
|
||||||
|
digits are read (letters can be in upper or lower case). Any number of
|
||||||
|
hexadecimal digits may appear between \ex{ and }. If a character other than
|
||||||
|
a hexadecimal digit appears between \ex{ and }, or if there is no terminating
|
||||||
|
}, an error occurs.
|
||||||
|
.P
|
||||||
|
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is
|
||||||
|
as just described only when it is followed by two hexadecimal digits.
|
||||||
|
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||||
|
code points greater than 256 is provided by \eu, which must be followed by
|
||||||
|
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||||
|
.P
|
||||||
|
Characters whose value is less than 256 can be defined by either of the two
|
||||||
|
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
|
||||||
|
way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
|
||||||
|
\eu00dc in JavaScript mode).
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SS "Constraints on character values"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Characters that are specified using octal or hexadecimal numbers are
|
||||||
|
limited to certain values, as follows:
|
||||||
|
.sp
|
||||||
|
8-bit non-UTF mode less than 0x100
|
||||||
|
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||||
|
16-bit non-UTF mode less than 0x10000
|
||||||
|
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||||
|
32-bit non-UTF mode less than 0x100000000
|
||||||
|
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||||
|
.sp
|
||||||
|
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||||
|
"surrogate" codepoints), and 0xffef.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SS "Escape sequences in character classes"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
All the sequences that define a single character value can be used both inside
|
All the sequences that define a single character value can be used both inside
|
||||||
and outside character classes. In addition, inside a character class, \eb is
|
and outside character classes. In addition, inside a character class, \eb is
|
||||||
interpreted as the backspace character (hex 08).
|
interpreted as the backspace character (hex 08).
|
||||||
@ -494,11 +531,14 @@ classes. They each match one character of the appropriate type. If the current
|
|||||||
matching point is at the end of the subject string, all of them fail, because
|
matching point is at the end of the subject string, all of them fail, because
|
||||||
there is no character to match.
|
there is no character to match.
|
||||||
.P
|
.P
|
||||||
For compatibility with Perl, \es does not match the VT character (code 11).
|
For compatibility with Perl, \es did not used to match the VT character (code
|
||||||
This makes it different from the the POSIX "space" class. The \es characters
|
11), which made it different from the the POSIX "space" class. However, Perl
|
||||||
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
|
added VT at release 5.18, and PCRE followed suit at release 8.34. The default
|
||||||
included in a Perl script, \es may match the VT character. In PCRE, it never
|
\es characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space
|
||||||
does.
|
(32), which are defined as white space in the "C" locale. This list may vary if
|
||||||
|
locale-specific matching is taking place. For example, in some locales the
|
||||||
|
"non-breaking space" character (\exA0) is recognized as white space, and in
|
||||||
|
others the VT character is not.
|
||||||
.P
|
.P
|
||||||
A "word" character is an underscore or any character that is a letter or digit.
|
A "word" character is an underscore or any character that is a letter or digit.
|
||||||
By default, the definition of letters and digits is controlled by PCRE's
|
By default, the definition of letters and digits is controlled by PCRE's
|
||||||
@ -513,20 +553,22 @@ in the
|
|||||||
\fBpcreapi\fP
|
\fBpcreapi\fP
|
||||||
.\"
|
.\"
|
||||||
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
||||||
or "french" in Windows, some character codes greater than 128 are used for
|
or "french" in Windows, some character codes greater than 127 are used for
|
||||||
accented letters, and these are then matched by \ew. The use of locales with
|
accented letters, and these are then matched by \ew. The use of locales with
|
||||||
Unicode is discouraged.
|
Unicode is discouraged.
|
||||||
.P
|
.P
|
||||||
By default, in a UTF mode, characters with values greater than 128 never match
|
By default, characters whose code points are greater than 127 never match \ed,
|
||||||
\ed, \es, or \ew, and always match \eD, \eS, and \eW. These sequences retain
|
\es, or \ew, and always match \eD, \eS, and \eW, although this may vary for
|
||||||
their original meanings from before UTF support was available, mainly for
|
characters in the range 128-255 when locale-specific matching is happening.
|
||||||
efficiency reasons. However, if PCRE is compiled with Unicode property support,
|
These escape sequences retain their original meanings from before Unicode
|
||||||
and the PCRE_UCP option is set, the behaviour is changed so that Unicode
|
support was available, mainly for efficiency reasons. If PCRE is compiled with
|
||||||
properties are used to determine character types, as follows:
|
Unicode property support, and the PCRE_UCP option is set, the behaviour is
|
||||||
|
changed so that Unicode properties are used to determine character types, as
|
||||||
|
follows:
|
||||||
.sp
|
.sp
|
||||||
\ed any character that \ep{Nd} matches (decimal digit)
|
\ed any character that matches \ep{Nd} (decimal digit)
|
||||||
\es any character that \ep{Z} matches, plus HT, LF, FF, CR
|
\es any character that matches \ep{Z} or \eh or \ev
|
||||||
\ew any character that \ep{L} or \ep{N} matches, plus underscore
|
\ew any character that matches \ep{L} or \ep{N}, plus underscore
|
||||||
.sp
|
.sp
|
||||||
The upper case escapes match the inverse sets of characters. Note that \ed
|
The upper case escapes match the inverse sets of characters. Note that \ed
|
||||||
matches only decimal digits, whereas \ew matches any Unicode digit, as well as
|
matches only decimal digits, whereas \ew matches any Unicode digit, as well as
|
||||||
@ -536,7 +578,7 @@ is noticeably slower when PCRE_UCP is set.
|
|||||||
.P
|
.P
|
||||||
The sequences \eh, \eH, \ev, and \eV are features that were added to Perl at
|
The sequences \eh, \eH, \ev, and \eV are features that were added to Perl at
|
||||||
release 5.10. In contrast to the other sequences, which match only ASCII
|
release 5.10. In contrast to the other sequences, which match only ASCII
|
||||||
characters by default, these always match certain high-valued codepoints,
|
characters by default, these always match certain high-valued code points,
|
||||||
whether or not PCRE_UCP is set. The horizontal space characters are:
|
whether or not PCRE_UCP is set. The horizontal space characters are:
|
||||||
.sp
|
.sp
|
||||||
U+0009 Horizontal tab (HT)
|
U+0009 Horizontal tab (HT)
|
||||||
@ -906,9 +948,9 @@ the "mark" property always have the "extend" grapheme breaking property.
|
|||||||
.sp
|
.sp
|
||||||
As well as the standard Unicode properties described above, PCRE supports four
|
As well as the standard Unicode properties described above, PCRE supports four
|
||||||
more that make it possible to convert traditional escape sequences such as \ew
|
more that make it possible to convert traditional escape sequences such as \ew
|
||||||
and \es and POSIX character classes to use Unicode properties. PCRE uses these
|
and \es to use Unicode properties. PCRE uses these non-standard, non-Perl
|
||||||
non-standard, non-Perl properties internally when PCRE_UCP is set. However,
|
properties internally when PCRE_UCP is set. However, they may also be used
|
||||||
they may also be used explicitly. These properties are:
|
explicitly. These properties are:
|
||||||
.sp
|
.sp
|
||||||
Xan Any alphanumeric character
|
Xan Any alphanumeric character
|
||||||
Xps Any POSIX space character
|
Xps Any POSIX space character
|
||||||
@ -918,8 +960,9 @@ they may also be used explicitly. These properties are:
|
|||||||
Xan matches characters that have either the L (letter) or the N (number)
|
Xan matches characters that have either the L (letter) or the N (number)
|
||||||
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
||||||
carriage return, and any other character that has the Z (separator) property.
|
carriage return, and any other character that has the Z (separator) property.
|
||||||
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
|
Xsp is the same as Xps; it used to exclude vertical tab, for Perl
|
||||||
same characters as Xan, plus underscore.
|
compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd
|
||||||
|
matches the same characters as Xan, plus underscore.
|
||||||
.P
|
.P
|
||||||
There is another non-standard property, Xuc, which matches any character that
|
There is another non-standard property, Xuc, which matches any character that
|
||||||
can be represented by a Universal Character Name in C++ and other programming
|
can be represented by a Universal Character Name in C++ and other programming
|
||||||
@ -1215,7 +1258,9 @@ The minus (hyphen) character can be used to specify a range of characters in a
|
|||||||
character class. For example, [d-m] matches any letter between d and m,
|
character class. For example, [d-m] matches any letter between d and m,
|
||||||
inclusive. If a minus character is required in a class, it must be escaped with
|
inclusive. If a minus character is required in a class, it must be escaped with
|
||||||
a backslash or appear in a position where it cannot be interpreted as
|
a backslash or appear in a position where it cannot be interpreted as
|
||||||
indicating a range, typically as the first or last character in the class.
|
indicating a range, typically as the first or last character in the class, or
|
||||||
|
immediately after a range. For example, [b-d-z] matches letters in the range b
|
||||||
|
to d, a hyphen character, or z.
|
||||||
.P
|
.P
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
range. A pattern such as [W-]46] is interpreted as a class of two characters
|
range. A pattern such as [W-]46] is interpreted as a class of two characters
|
||||||
@ -1225,6 +1270,11 @@ the end of range, so [W-\e]46] is interpreted as a class containing a range
|
|||||||
followed by two other characters. The octal or hexadecimal representation of
|
followed by two other characters. The octal or hexadecimal representation of
|
||||||
"]" can also be used to end a range.
|
"]" can also be used to end a range.
|
||||||
.P
|
.P
|
||||||
|
An error is generated if a POSIX character class (see below) or an escape
|
||||||
|
sequence other than one that defines a single character appears at a point
|
||||||
|
where a range ending character is expected. For example, [z-\exff] is valid,
|
||||||
|
but [A-\ed] and [A-[:digit:]] are not.
|
||||||
|
.P
|
||||||
Ranges operate in the collating sequence of character values. They can also be
|
Ranges operate in the collating sequence of character values. They can also be
|
||||||
used for characters specified numerically, for example [\e000-\e037]. Ranges
|
used for characters specified numerically, for example [\e000-\e037]. Ranges
|
||||||
can include any characters that are valid for the current mode.
|
can include any characters that are valid for the current mode.
|
||||||
@ -1263,9 +1313,9 @@ something AND NOT ...".
|
|||||||
The only metacharacters that are recognized in character classes are backslash,
|
The only metacharacters that are recognized in character classes are backslash,
|
||||||
hyphen (only where it can be interpreted as specifying a range), circumflex
|
hyphen (only where it can be interpreted as specifying a range), circumflex
|
||||||
(only at the start), opening square bracket (only when it can be interpreted as
|
(only at the start), opening square bracket (only when it can be interpreted as
|
||||||
introducing a POSIX class name - see the next section), and the terminating
|
introducing a POSIX class name, or for a special compatibility feature - see
|
||||||
closing square bracket. However, escaping other non-alphanumeric characters
|
the next two sections), and the terminating closing square bracket. However,
|
||||||
does no harm.
|
escaping other non-alphanumeric characters does no harm.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "POSIX CHARACTER CLASSES"
|
.SH "POSIX CHARACTER CLASSES"
|
||||||
@ -1290,15 +1340,17 @@ are:
|
|||||||
lower lower case letters
|
lower lower case letters
|
||||||
print printing characters, including space
|
print printing characters, including space
|
||||||
punct printing characters, excluding letters and digits and space
|
punct printing characters, excluding letters and digits and space
|
||||||
space white space (not quite the same as \es)
|
space white space (the same as \es from PCRE 8.34)
|
||||||
upper upper case letters
|
upper upper case letters
|
||||||
word "word" characters (same as \ew)
|
word "word" characters (same as \ew)
|
||||||
xdigit hexadecimal digits
|
xdigit hexadecimal digits
|
||||||
.sp
|
.sp
|
||||||
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
|
The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
|
||||||
space (32). Notice that this list includes the VT character (code 11). This
|
and space (32). If locale-specific matching is taking place, the list of space
|
||||||
makes "space" different to \es, which does not include VT (for Perl
|
characters may be different; there may be fewer or more of them. "Space" used
|
||||||
compatibility).
|
to be different to \es, which did not include VT, for Perl compatibility.
|
||||||
|
However, Perl changed at release 5.18, and PCRE followed at release 8.34.
|
||||||
|
"Space" and \es now match the same set of characters.
|
||||||
.P
|
.P
|
||||||
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
||||||
5.8. Another Perl extension is negation, which is indicated by a ^ character
|
5.8. Another Perl extension is negation, which is indicated by a ^ character
|
||||||
@ -1310,11 +1362,11 @@ matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
|
|||||||
syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
|
syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
|
||||||
supported, and an error is given if they are encountered.
|
supported, and an error is given if they are encountered.
|
||||||
.P
|
.P
|
||||||
By default, in UTF modes, characters with values greater than 128 do not match
|
By default, characters with values greater than 128 do not match any of the
|
||||||
any of the POSIX character classes. However, if the PCRE_UCP option is passed
|
POSIX character classes. However, if the PCRE_UCP option is passed to
|
||||||
to \fBpcre_compile()\fP, some of the classes are changed so that Unicode
|
\fBpcre_compile()\fP, some of the classes are changed so that Unicode character
|
||||||
character properties are used. This is achieved by replacing the POSIX classes
|
properties are used. This is achieved by replacing certain POSIX classes by
|
||||||
by other sequences, as follows:
|
other sequences, as follows:
|
||||||
.sp
|
.sp
|
||||||
[:alnum:] becomes \ep{Xan}
|
[:alnum:] becomes \ep{Xan}
|
||||||
[:alpha:] becomes \ep{L}
|
[:alpha:] becomes \ep{L}
|
||||||
@ -1325,9 +1377,54 @@ by other sequences, as follows:
|
|||||||
[:upper:] becomes \ep{Lu}
|
[:upper:] becomes \ep{Lu}
|
||||||
[:word:] becomes \ep{Xwd}
|
[:word:] becomes \ep{Xwd}
|
||||||
.sp
|
.sp
|
||||||
Negated versions, such as [:^alpha:] use \eP instead of \ep. The other POSIX
|
Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX
|
||||||
classes are unchanged, and match only characters with code points less than
|
classes are handled specially in UCP mode:
|
||||||
128.
|
.TP 10
|
||||||
|
[:graph:]
|
||||||
|
This matches characters that have glyphs that mark the page when printed. In
|
||||||
|
Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
|
||||||
|
properties, except for:
|
||||||
|
.sp
|
||||||
|
U+061C Arabic Letter Mark
|
||||||
|
U+180E Mongolian Vowel Separator
|
||||||
|
U+2066 - U+2069 Various "isolate"s
|
||||||
|
.sp
|
||||||
|
.TP 10
|
||||||
|
[:print:]
|
||||||
|
This matches the same characters as [:graph:] plus space characters that are
|
||||||
|
not controls, that is, characters with the Zs property.
|
||||||
|
.TP 10
|
||||||
|
[:punct:]
|
||||||
|
This matches all characters that have the Unicode P (punctuation) property,
|
||||||
|
plus those characters whose code points are less than 128 that have the S
|
||||||
|
(Symbol) property.
|
||||||
|
.P
|
||||||
|
The other POSIX classes are unchanged, and match only characters with code
|
||||||
|
points less than 128.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly
|
||||||
|
syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of
|
||||||
|
word". PCRE treats these items as follows:
|
||||||
|
.sp
|
||||||
|
[[:<:]] is converted to \eb(?=\ew)
|
||||||
|
[[:>:]] is converted to \eb(?<=\ew)
|
||||||
|
.sp
|
||||||
|
Only these exact character sequences are recognized. A sequence such as
|
||||||
|
[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is
|
||||||
|
not compatible with Perl. It is provided to help migrations from other
|
||||||
|
environments, and is best not used in any new patterns. Note that \eb matches
|
||||||
|
at the start and the end of a word (see
|
||||||
|
.\" HTML <a href="#smallassertions">
|
||||||
|
.\" </a>
|
||||||
|
"Simple assertions"
|
||||||
|
.\"
|
||||||
|
above), and in a Perl-style pattern the preceding or following character
|
||||||
|
normally shows which is wanted, without the need for the assertions that are
|
||||||
|
used above in order to give exactly the POSIX behaviour.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "VERTICAL BAR"
|
.SH "VERTICAL BAR"
|
||||||
@ -1547,11 +1644,12 @@ conditions,
|
|||||||
.\"
|
.\"
|
||||||
can be made by name as well as by number.
|
can be made by name as well as by number.
|
||||||
.P
|
.P
|
||||||
Names consist of up to 32 alphanumeric characters and underscores. Named
|
Names consist of up to 32 alphanumeric characters and underscores, but must
|
||||||
capturing parentheses are still allocated numbers as well as names, exactly as
|
start with a non-digit. Named capturing parentheses are still allocated numbers
|
||||||
if the names were not present. The PCRE API provides function calls for
|
as well as names, exactly as if the names were not present. The PCRE API
|
||||||
extracting the name-to-number translation table from a compiled pattern. There
|
provides function calls for extracting the name-to-number translation table
|
||||||
is also a convenience function for extracting a captured substring by name.
|
from a compiled pattern. There is also a convenience function for extracting a
|
||||||
|
captured substring by name.
|
||||||
.P
|
.P
|
||||||
By default, a name must be unique within a pattern, but it is possible to relax
|
By default, a name must be unique within a pattern, but it is possible to relax
|
||||||
this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
|
this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
|
||||||
@ -1577,9 +1675,20 @@ for the first (and in this example, the only) subpattern of that name that
|
|||||||
matched. This saves searching to find which numbered subpattern it was.
|
matched. This saves searching to find which numbered subpattern it was.
|
||||||
.P
|
.P
|
||||||
If you make a back reference to a non-unique named subpattern from elsewhere in
|
If you make a back reference to a non-unique named subpattern from elsewhere in
|
||||||
the pattern, the one that corresponds to the first occurrence of the name is
|
the pattern, the subpatterns to which the name refers are checked in the order
|
||||||
used. In the absence of duplicate numbers (see the previous section) this is
|
in which they appear in the overall pattern. The first one that is set is used
|
||||||
the one with the lowest number. If you use a named reference in a condition
|
for the reference. For example, this pattern matches both "foofoo" and
|
||||||
|
"barbar" but not "foobar" or "barfoo":
|
||||||
|
.sp
|
||||||
|
(?:(?<n>foo)|(?<n>bar))\ek<n>
|
||||||
|
.sp
|
||||||
|
.P
|
||||||
|
If you make a subroutine call to a non-unique named subpattern, the one that
|
||||||
|
corresponds to the first occurrence of the name is used. In the absence of
|
||||||
|
duplicate numbers (see the previous section) this is the one with the lowest
|
||||||
|
number.
|
||||||
|
.P
|
||||||
|
If you use a named reference in a condition
|
||||||
test (see the
|
test (see the
|
||||||
.\"
|
.\"
|
||||||
.\" HTML <a href="#conditions">
|
.\" HTML <a href="#conditions">
|
||||||
@ -1599,8 +1708,9 @@ documentation.
|
|||||||
\fBWarning:\fP You cannot use different names to distinguish between two
|
\fBWarning:\fP You cannot use different names to distinguish between two
|
||||||
subpatterns with the same number because PCRE uses only the numbers when
|
subpatterns with the same number because PCRE uses only the numbers when
|
||||||
matching. For this reason, an error is given at compile time if different names
|
matching. For this reason, an error is given at compile time if different names
|
||||||
are given to subpatterns with the same number. However, you can give the same
|
are given to subpatterns with the same number. However, you can always give the
|
||||||
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
|
same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
|
||||||
|
set.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH REPETITION
|
.SH REPETITION
|
||||||
@ -2271,12 +2381,7 @@ This makes the fragment independent of the parentheses in the larger pattern.
|
|||||||
.sp
|
.sp
|
||||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||||
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
||||||
this facility before Perl, the syntax (?(name)...) is also recognized. However,
|
this facility before Perl, the syntax (?(name)...) is also recognized.
|
||||||
there is a possible ambiguity with this syntax, because subpattern names may
|
|
||||||
consist entirely of digits. PCRE looks first for a named subpattern; if it
|
|
||||||
cannot find one and the name consists entirely of digits, PCRE looks for a
|
|
||||||
subpattern of that number, which must be greater than zero. Using subpattern
|
|
||||||
names that consist entirely of digits is not recommended.
|
|
||||||
.P
|
.P
|
||||||
Rewriting the above example to use a named subpattern gives this:
|
Rewriting the above example to use a named subpattern gives this:
|
||||||
.sp
|
.sp
|
||||||
@ -2698,8 +2803,13 @@ During matching, when PCRE reaches a callout point, the external function is
|
|||||||
called. It is provided with the number of the callout, the position in the
|
called. It is provided with the number of the callout, the position in the
|
||||||
pattern, and, optionally, one item of data originally supplied by the caller of
|
pattern, and, optionally, one item of data originally supplied by the caller of
|
||||||
the matching function. The callout function may cause matching to proceed, to
|
the matching function. The callout function may cause matching to proceed, to
|
||||||
backtrack, or to fail altogether. A complete description of the interface to
|
backtrack, or to fail altogether.
|
||||||
the callout function is given in the
|
.P
|
||||||
|
By default, PCRE implements a number of optimizations at compile time and
|
||||||
|
matching time, and one side-effect is that sometimes callouts are skipped. If
|
||||||
|
you need all possible callouts to happen, you need to set options that disable
|
||||||
|
the relevant optimizations. More details, and a complete description of the
|
||||||
|
interface to the callout function, are given in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcrecallout\fP
|
\fBpcrecallout\fP
|
||||||
.\"
|
.\"
|
||||||
@ -3060,7 +3170,7 @@ example:
|
|||||||
.sp
|
.sp
|
||||||
...(*COMMIT)(*PRUNE)...
|
...(*COMMIT)(*PRUNE)...
|
||||||
.sp
|
.sp
|
||||||
If there is a matching failure to the right, backtracking onto (*PRUNE) cases
|
If there is a matching failure to the right, backtracking onto (*PRUNE) causes
|
||||||
it to be triggered, and its action is taken. There can never be a backtrack
|
it to be triggered, and its action is taken. There can never be a backtrack
|
||||||
onto (*COMMIT).
|
onto (*COMMIT).
|
||||||
.
|
.
|
||||||
@ -3145,6 +3255,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 26 April 2013
|
Last updated: 03 December 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,25 +1,22 @@
|
|||||||
.TH PCREPOSIX 3 "09 January 2012" "PCRE 8.30"
|
.TH PCREPOSIX 3 "09 January 2012" "PCRE 8.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions.
|
PCRE - Perl-compatible regular expressions.
|
||||||
.SH "SYNOPSIS OF POSIX API"
|
.SH "SYNOPSIS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.B #include <pcreposix.h>
|
.B #include <pcreposix.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.nf
|
||||||
.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
|
.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
|
||||||
.ti +5n
|
.B " int \fIcflags\fP);"
|
||||||
.B int \fIcflags\fP);
|
.sp
|
||||||
.PP
|
|
||||||
.B int regexec(regex_t *\fIpreg\fP, const char *\fIstring\fP,
|
.B int regexec(regex_t *\fIpreg\fP, const char *\fIstring\fP,
|
||||||
.ti +5n
|
.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);"
|
||||||
.B size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);
|
.B " size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,"
|
||||||
.PP
|
.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);"
|
||||||
.B size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,
|
.sp
|
||||||
.ti +5n
|
|
||||||
.B char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);
|
|
||||||
.PP
|
|
||||||
.B void regfree(regex_t *\fIpreg\fP);
|
.B void regfree(regex_t *\fIpreg\fP);
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCREPRECOMPILE 3 "24 June 2012" "PCRE 8.30"
|
.TH PCREPRECOMPILE 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS"
|
.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS"
|
||||||
@ -90,8 +90,8 @@ study data.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||||
memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary,
|
memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary, you
|
||||||
you pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in
|
pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in
|
||||||
the usual way.
|
the usual way.
|
||||||
.P
|
.P
|
||||||
However, if you passed a pointer to custom character tables when the pattern
|
However, if you passed a pointer to custom character tables when the pattern
|
||||||
@ -110,15 +110,19 @@ in the
|
|||||||
.\"
|
.\"
|
||||||
documentation.
|
documentation.
|
||||||
.P
|
.P
|
||||||
|
\fBWarning:\fP The tables that \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP use
|
||||||
|
must be the same as those that were used when the pattern was compiled. If this
|
||||||
|
is not the case, the behaviour is undefined.
|
||||||
|
.P
|
||||||
If you did not provide custom character tables when the pattern was compiled,
|
If you did not provide custom character tables when the pattern was compiled,
|
||||||
the pointer in the compiled pattern is NULL, which causes the matching
|
the pointer in the compiled pattern is NULL, which causes the matching
|
||||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||||
special action at run time in this case.
|
special action at run time in this case.
|
||||||
.P
|
.P
|
||||||
If you saved study data with the compiled pattern, you need to create your own
|
If you saved study data with the compiled pattern, you need to create your own
|
||||||
\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point to the
|
\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point
|
||||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||||
\fIflags\fP field to indicate that study data is present. Then pass the
|
the \fIflags\fP field to indicate that study data is present. Then pass the
|
||||||
\fBpcre[16|32]_extra\fP block to the matching function in the usual way. If the
|
\fBpcre[16|32]_extra\fP block to the matching function in the usual way. If the
|
||||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||||
and so is lost by a save/restore cycle.
|
and so is lost by a save/restore cycle.
|
||||||
@ -146,6 +150,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 24 June 2012
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2012 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRESYNTAX 3 "26 April 2013" "PCRE 8.33"
|
.TH PCRESYNTAX 3 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions
|
PCRE - Perl-compatible regular expressions
|
||||||
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
|
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
|
||||||
@ -29,9 +29,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\en newline (hex 0A)
|
\en newline (hex 0A)
|
||||||
\er carriage return (hex 0D)
|
\er carriage return (hex 0D)
|
||||||
\et tab (hex 09)
|
\et tab (hex 09)
|
||||||
|
\e0dd character with octal code 0dd
|
||||||
\eddd character with octal code ddd, or backreference
|
\eddd character with octal code ddd, or backreference
|
||||||
|
\eo{ddd..} character with octal code ddd..
|
||||||
\exhh character with hex code hh
|
\exhh character with hex code hh
|
||||||
\ex{hhh..} character with hex code hhh..
|
\ex{hhh..} character with hex code hhh..
|
||||||
|
.sp
|
||||||
|
Note that \e0dd is always an octal code, and that \e8 and \e9 are the literal
|
||||||
|
characters "8" and "9".
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "CHARACTER TYPES"
|
.SH "CHARACTER TYPES"
|
||||||
@ -56,9 +61,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||||||
\eW a "non-word" character
|
\eW a "non-word" character
|
||||||
\eX a Unicode extended grapheme cluster
|
\eX a Unicode extended grapheme cluster
|
||||||
.sp
|
.sp
|
||||||
In PCRE, by default, \ed, \eD, \es, \eS, \ew, and \eW recognize only ASCII
|
By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode
|
||||||
characters, even in a UTF mode. However, this can be changed by setting the
|
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||||
PCRE_UCP option.
|
happening, \es and \ew may also match characters with code points in the range
|
||||||
|
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||||
|
is changed to use Unicode properties and they match many more characters.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
|
.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
|
||||||
@ -115,10 +122,13 @@ PCRE_UCP option.
|
|||||||
.sp
|
.sp
|
||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
Xuc Univerally-named character: one that can be
|
Xuc Univerally-named character: one that can be
|
||||||
represented by a Universal Character Name
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
|
.sp
|
||||||
|
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||||
|
at release 5.18 and PCRE changed at release 8.34.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "SCRIPT NAMES FOR \ep AND \eP"
|
.SH "SCRIPT NAMES FOR \ep AND \eP"
|
||||||
@ -355,6 +365,9 @@ newline-setting options with similar syntax:
|
|||||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||||
(*UTF) set appropriate UTF mode for the library in use
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
(*UCP) set PCRE_UCP (use Unicode properties for \ed etc)
|
(*UCP) set PCRE_UCP (use Unicode properties for \ed etc)
|
||||||
|
.sp
|
||||||
|
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||||
|
limits set by the caller of pcre_exec(), not increase them.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
|
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
|
||||||
@ -495,6 +508,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 26 April 2013
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
.TH PCRETEST 1 "26 April 2013" "PCRE 8.33"
|
.TH PCRETEST 1 "12 November 2013" "PCRE 8.34"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcretest - a program for testing Perl-compatible regular expressions.
|
pcretest - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
@ -155,6 +155,10 @@ Output the size of each compiled pattern after it has been compiled. This is
|
|||||||
equivalent to adding \fB/M\fP to each regular expression. The size is given in
|
equivalent to adding \fB/M\fP to each regular expression. The size is given in
|
||||||
bytes for both libraries.
|
bytes for both libraries.
|
||||||
.TP 10
|
.TP 10
|
||||||
|
\fB-O\fP
|
||||||
|
Behave as if each pattern has the \fB/O\fP modifier, that is disable
|
||||||
|
auto-possessification for all patterns.
|
||||||
|
.TP 10
|
||||||
\fB-o\fP \fIosize\fP
|
\fB-o\fP \fIosize\fP
|
||||||
Set the number of elements in the output vector that is used when calling
|
Set the number of elements in the output vector that is used when calling
|
||||||
\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP to be \fIosize\fP. The
|
\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP to be \fIosize\fP. The
|
||||||
@ -216,17 +220,21 @@ contains (*MARK) items there may also be differences, for the same reason. The
|
|||||||
should never be studied (see the \fB/S\fP pattern modifier below).
|
should never be studied (see the \fB/S\fP pattern modifier below).
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB-t\fP
|
\fB-t\fP
|
||||||
Run each compile, study, and match many times with a timer, and output
|
Run each compile, study, and match many times with a timer, and output the
|
||||||
resulting time per compile or match (in milliseconds). Do not set \fB-m\fP with
|
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||||
\fB-t\fP, because you will then get the size output a zillion times, and the
|
\fB-m\fP with \fB-t\fP, because you will then get the size output a zillion
|
||||||
timing will be distorted. You can control the number of iterations that are
|
times, and the timing will be distorted. You can control the number of
|
||||||
used for timing by following \fB-t\fP with a number (as a separate item on the
|
iterations that are used for timing by following \fB-t\fP with a number (as a
|
||||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||||
to iterate 500000 times.
|
The default is to iterate 500000 times.
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB-tm\fP
|
\fB-tm\fP
|
||||||
This is like \fB-t\fP except that it times only the matching phase, not the
|
This is like \fB-t\fP except that it times only the matching phase, not the
|
||||||
compile or study phases.
|
compile or study phases.
|
||||||
|
.TP 10
|
||||||
|
\fB-T\fP \fB-TM\fP
|
||||||
|
These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run,
|
||||||
|
the total times for all compiles, studies, and matches are output.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
@ -246,7 +254,7 @@ option states whether or not \fBreadline()\fP will be used.
|
|||||||
.P
|
.P
|
||||||
The program handles any number of sets of input on a single input file. Each
|
The program handles any number of sets of input on a single input file. Each
|
||||||
set starts with a regular expression, and continues with any number of data
|
set starts with a regular expression, and continues with any number of data
|
||||||
lines to be matched against the pattern.
|
lines to be matched against that pattern.
|
||||||
.P
|
.P
|
||||||
Each data line is matched separately and independently. If you want to do
|
Each data line is matched separately and independently. If you want to do
|
||||||
multi-line matches, you have to use the \en escape sequence (or \er or \er\en,
|
multi-line matches, you have to use the \en escape sequence (or \er or \er\en,
|
||||||
@ -320,6 +328,7 @@ sections.
|
|||||||
\fB/M\fP show compiled memory size
|
\fB/M\fP show compiled memory size
|
||||||
\fB/m\fP set PCRE_MULTILINE
|
\fB/m\fP set PCRE_MULTILINE
|
||||||
\fB/N\fP set PCRE_NO_AUTO_CAPTURE
|
\fB/N\fP set PCRE_NO_AUTO_CAPTURE
|
||||||
|
\fB/O\fP set PCRE_NO_AUTO_POSSESS
|
||||||
\fB/P\fP use the POSIX wrapper
|
\fB/P\fP use the POSIX wrapper
|
||||||
\fB/S\fP study the pattern after compilation
|
\fB/S\fP study the pattern after compilation
|
||||||
\fB/s\fP set PCRE_DOTALL
|
\fB/s\fP set PCRE_DOTALL
|
||||||
@ -376,6 +385,7 @@ options that do not correspond to anything in Perl:
|
|||||||
\fB/f\fP PCRE_FIRSTLINE
|
\fB/f\fP PCRE_FIRSTLINE
|
||||||
\fB/J\fP PCRE_DUPNAMES
|
\fB/J\fP PCRE_DUPNAMES
|
||||||
\fB/N\fP PCRE_NO_AUTO_CAPTURE
|
\fB/N\fP PCRE_NO_AUTO_CAPTURE
|
||||||
|
\fB/O\fP PCRE_NO_AUTO_POSSESS
|
||||||
\fB/U\fP PCRE_UNGREEDY
|
\fB/U\fP PCRE_UNGREEDY
|
||||||
\fB/W\fP PCRE_UCP
|
\fB/W\fP PCRE_UCP
|
||||||
\fB/X\fP PCRE_EXTRA
|
\fB/X\fP PCRE_EXTRA
|
||||||
@ -508,8 +518,8 @@ expression has been compiled, and the results used when the expression is
|
|||||||
matched. There are a number of qualifying characters that may follow \fB/S\fP.
|
matched. There are a number of qualifying characters that may follow \fB/S\fP.
|
||||||
They may appear in any order.
|
They may appear in any order.
|
||||||
.P
|
.P
|
||||||
If \fBS\fP is followed by an exclamation mark, \fBpcre[16|32]_study()\fP is called
|
If \fB/S\fP is followed by an exclamation mark, \fBpcre[16|32]_study()\fP is
|
||||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||||
\fBpcre_extra\fP block, even when studying discovers no useful information.
|
\fBpcre_extra\fP block, even when studying discovers no useful information.
|
||||||
.P
|
.P
|
||||||
If \fB/S\fP is followed by a second S character, it suppresses studying, even
|
If \fB/S\fP is followed by a second S character, it suppresses studying, even
|
||||||
@ -585,6 +595,37 @@ The \fB/+\fP modifier works as described above. All other modifiers are
|
|||||||
ignored.
|
ignored.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Locking out certain modifiers"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
PCRE can be compiled with or without support for certain features such as
|
||||||
|
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||||
|
into a number of different files that are selected for running depending on
|
||||||
|
which features are available. When updating the tests, it is all too easy to
|
||||||
|
put a new test into the wrong file by mistake; for example, to put a test that
|
||||||
|
requires UTF support into a file that is used when it is not available. To help
|
||||||
|
detect such mistakes as early as possible, there is a facility for locking out
|
||||||
|
specific modifiers. If an input line for \fBpcretest\fP starts with the string
|
||||||
|
"< forbid " the following sequence of characters is taken as a list of
|
||||||
|
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||||
|
Unicode property support, this line appears:
|
||||||
|
.sp
|
||||||
|
< forbid 8W
|
||||||
|
.sp
|
||||||
|
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||||
|
subsequently encountered. If the character string contains < but not >, all the
|
||||||
|
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||||
|
modifiers must be explicitly listed, for example:
|
||||||
|
.sp
|
||||||
|
< forbid <JS><cr>
|
||||||
|
.sp
|
||||||
|
There must be a single space between < and "forbid" for this feature to be
|
||||||
|
recognised. If there is not, the line is interpreted either as a request to
|
||||||
|
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||||
|
below) or, if there is a another < character, as a pattern that uses < as its
|
||||||
|
delimiter.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "DATA LINES"
|
.SH "DATA LINES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
@ -608,6 +649,7 @@ recognized:
|
|||||||
\ev vertical tab (\ex0b)
|
\ev vertical tab (\ex0b)
|
||||||
\ennn octal character (up to 3 octal digits); always
|
\ennn octal character (up to 3 octal digits); always
|
||||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||||
|
\eo{dd...} octal character (any number of octal digits}
|
||||||
\exhh hexadecimal byte (up to 2 hex digits)
|
\exhh hexadecimal byte (up to 2 hex digits)
|
||||||
\ex{hh...} hexadecimal character (any number of hex digits)
|
\ex{hh...} hexadecimal character (any number of hex digits)
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
@ -1031,10 +1073,9 @@ exact copy of the compiled pattern. If there is additional study data, this
|
|||||||
writing the file, \fBpcretest\fP expects to read a new pattern.
|
writing the file, \fBpcretest\fP expects to read a new pattern.
|
||||||
.P
|
.P
|
||||||
A saved pattern can be reloaded into \fBpcretest\fP by specifying < and a file
|
A saved pattern can be reloaded into \fBpcretest\fP by specifying < and a file
|
||||||
name instead of a pattern. The name of the file must not contain a < character,
|
name instead of a pattern. There must be no space between < and the file name,
|
||||||
as otherwise \fBpcretest\fP will interpret the line as a pattern delimited by <
|
which must not contain a < character, as otherwise \fBpcretest\fP will
|
||||||
characters.
|
interpret the line as a pattern delimited by < characters. For example:
|
||||||
For example:
|
|
||||||
.sp
|
.sp
|
||||||
re> </some/file
|
re> </some/file
|
||||||
Compiled pattern loaded from /some/file
|
Compiled pattern loaded from /some/file
|
||||||
@ -1094,6 +1135,6 @@ Cambridge CB2 3QH, England.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 26 April 2013
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -138,32 +138,35 @@ COMMAND LINE OPTIONS
|
|||||||
compiled. This is equivalent to adding /M to each regular
|
compiled. This is equivalent to adding /M to each regular
|
||||||
expression. The size is given in bytes for both libraries.
|
expression. The size is given in bytes for both libraries.
|
||||||
|
|
||||||
-o osize Set the number of elements in the output vector that is used
|
-O Behave as if each pattern has the /O modifier, that is dis-
|
||||||
when calling pcre[16|32]_exec() or pcre[16|32]_dfa_exec() to
|
able auto-possessification for all patterns.
|
||||||
be osize. The default value is 45, which is enough for 14
|
|
||||||
|
-o osize Set the number of elements in the output vector that is used
|
||||||
|
when calling pcre[16|32]_exec() or pcre[16|32]_dfa_exec() to
|
||||||
|
be osize. The default value is 45, which is enough for 14
|
||||||
capturing subexpressions for pcre[16|32]_exec() or 22 differ-
|
capturing subexpressions for pcre[16|32]_exec() or 22 differ-
|
||||||
ent matches for pcre[16|32]_dfa_exec(). The vector size can
|
ent matches for pcre[16|32]_dfa_exec(). The vector size can
|
||||||
be changed for individual matching calls by including \O in
|
be changed for individual matching calls by including \O in
|
||||||
the data line (see below).
|
the data line (see below).
|
||||||
|
|
||||||
-p Behave as if each pattern has the /P modifier; the POSIX
|
-p Behave as if each pattern has the /P modifier; the POSIX
|
||||||
wrapper API is used to call PCRE. None of the other options
|
wrapper API is used to call PCRE. None of the other options
|
||||||
has any effect when -p is set. This option can be used only
|
has any effect when -p is set. This option can be used only
|
||||||
with the 8-bit library.
|
with the 8-bit library.
|
||||||
|
|
||||||
-q Do not output the version number of pcretest at the start of
|
-q Do not output the version number of pcretest at the start of
|
||||||
execution.
|
execution.
|
||||||
|
|
||||||
-S size On Unix-like systems, set the size of the run-time stack to
|
-S size On Unix-like systems, set the size of the run-time stack to
|
||||||
size megabytes.
|
size megabytes.
|
||||||
|
|
||||||
-s or -s+ Behave as if each pattern has the /S modifier; in other
|
-s or -s+ Behave as if each pattern has the /S modifier; in other
|
||||||
words, force each pattern to be studied. If -s+ is used, all
|
words, force each pattern to be studied. If -s+ is used, all
|
||||||
the JIT compile options are passed to pcre[16|32]_study(),
|
the JIT compile options are passed to pcre[16|32]_study(),
|
||||||
causing just-in-time optimization to be set up if it is
|
causing just-in-time optimization to be set up if it is
|
||||||
available, for both full and partial matching. Specific JIT
|
available, for both full and partial matching. Specific JIT
|
||||||
compile options can be selected by following -s+ with a digit
|
compile options can be selected by following -s+ with a digit
|
||||||
in the range 1 to 7, which selects the JIT compile modes as
|
in the range 1 to 7, which selects the JIT compile modes as
|
||||||
follows:
|
follows:
|
||||||
|
|
||||||
1 normal match only
|
1 normal match only
|
||||||
@ -173,115 +176,119 @@ COMMAND LINE OPTIONS
|
|||||||
6 soft and hard partial match
|
6 soft and hard partial match
|
||||||
7 all three modes (default)
|
7 all three modes (default)
|
||||||
|
|
||||||
If -s++ is used instead of -s+ (with or without a following
|
If -s++ is used instead of -s+ (with or without a following
|
||||||
digit), the text "(JIT)" is added to the first output line
|
digit), the text "(JIT)" is added to the first output line
|
||||||
after a match or no match when JIT-compiled code was actually
|
after a match or no match when JIT-compiled code was actually
|
||||||
used.
|
used.
|
||||||
|
|
||||||
Note that there are pattern options that can override -s,
|
Note that there are pattern options that can override -s,
|
||||||
either specifying no studying at all, or suppressing JIT com-
|
either specifying no studying at all, or suppressing JIT com-
|
||||||
pilation.
|
pilation.
|
||||||
|
|
||||||
If the /I or /D option is present on a pattern (requesting
|
If the /I or /D option is present on a pattern (requesting
|
||||||
output about the compiled pattern), information about the
|
output about the compiled pattern), information about the
|
||||||
result of studying is not included when studying is caused
|
result of studying is not included when studying is caused
|
||||||
only by -s and neither -i nor -d is present on the command
|
only by -s and neither -i nor -d is present on the command
|
||||||
line. This behaviour means that the output from tests that
|
line. This behaviour means that the output from tests that
|
||||||
are run with and without -s should be identical, except when
|
are run with and without -s should be identical, except when
|
||||||
options that output information about the actual running of a
|
options that output information about the actual running of a
|
||||||
match are set.
|
match are set.
|
||||||
|
|
||||||
The -M, -t, and -tm options, which give information about
|
The -M, -t, and -tm options, which give information about
|
||||||
resources used, are likely to produce different output with
|
resources used, are likely to produce different output with
|
||||||
and without -s. Output may also differ if the /C option is
|
and without -s. Output may also differ if the /C option is
|
||||||
present on an individual pattern. This uses callouts to trace
|
present on an individual pattern. This uses callouts to trace
|
||||||
the the matching process, and this may be different between
|
the the matching process, and this may be different between
|
||||||
studied and non-studied patterns. If the pattern contains
|
studied and non-studied patterns. If the pattern contains
|
||||||
(*MARK) items there may also be differences, for the same
|
(*MARK) items there may also be differences, for the same
|
||||||
reason. The -s command line option can be overridden for spe-
|
reason. The -s command line option can be overridden for spe-
|
||||||
cific patterns that should never be studied (see the /S pat-
|
cific patterns that should never be studied (see the /S pat-
|
||||||
tern modifier below).
|
tern modifier below).
|
||||||
|
|
||||||
-t Run each compile, study, and match many times with a timer,
|
-t Run each compile, study, and match many times with a timer,
|
||||||
and output resulting time per compile or match (in millisec-
|
and output the resulting times per compile, study, or match
|
||||||
onds). Do not set -m with -t, because you will then get the
|
(in milliseconds). Do not set -m with -t, because you will
|
||||||
size output a zillion times, and the timing will be dis-
|
then get the size output a zillion times, and the timing will
|
||||||
torted. You can control the number of iterations that are
|
be distorted. You can control the number of iterations that
|
||||||
used for timing by following -t with a number (as a separate
|
are used for timing by following -t with a number (as a sepa-
|
||||||
item on the command line). For example, "-t 1000" would iter-
|
rate item on the command line). For example, "-t 1000" iter-
|
||||||
ate 1000 times. The default is to iterate 500000 times.
|
ates 1000 times. The default is to iterate 500000 times.
|
||||||
|
|
||||||
-tm This is like -t except that it times only the matching phase,
|
-tm This is like -t except that it times only the matching phase,
|
||||||
not the compile or study phases.
|
not the compile or study phases.
|
||||||
|
|
||||||
|
-T -TM These behave like -t and -tm, but in addition, at the end of
|
||||||
|
a run, the total times for all compiles, studies, and matches
|
||||||
|
are output.
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
||||||
If pcretest is given two filename arguments, it reads from the first
|
If pcretest is given two filename arguments, it reads from the first
|
||||||
and writes to the second. If it is given only one filename argument, it
|
and writes to the second. If it is given only one filename argument, it
|
||||||
reads from that file and writes to stdout. Otherwise, it reads from
|
reads from that file and writes to stdout. Otherwise, it reads from
|
||||||
stdin and writes to stdout, and prompts for each line of input, using
|
stdin and writes to stdout, and prompts for each line of input, using
|
||||||
"re>" to prompt for regular expressions, and "data>" to prompt for data
|
"re>" to prompt for regular expressions, and "data>" to prompt for data
|
||||||
lines.
|
lines.
|
||||||
|
|
||||||
When pcretest is built, a configuration option can specify that it
|
When pcretest is built, a configuration option can specify that it
|
||||||
should be linked with the libreadline library. When this is done, if
|
should be linked with the libreadline library. When this is done, if
|
||||||
the input is from a terminal, it is read using the readline() function.
|
the input is from a terminal, it is read using the readline() function.
|
||||||
This provides line-editing and history facilities. The output from the
|
This provides line-editing and history facilities. The output from the
|
||||||
-help option states whether or not readline() will be used.
|
-help option states whether or not readline() will be used.
|
||||||
|
|
||||||
The program handles any number of sets of input on a single input file.
|
The program handles any number of sets of input on a single input file.
|
||||||
Each set starts with a regular expression, and continues with any num-
|
Each set starts with a regular expression, and continues with any num-
|
||||||
ber of data lines to be matched against the pattern.
|
ber of data lines to be matched against that pattern.
|
||||||
|
|
||||||
Each data line is matched separately and independently. If you want to
|
Each data line is matched separately and independently. If you want to
|
||||||
do multi-line matches, you have to use the \n escape sequence (or \r or
|
do multi-line matches, you have to use the \n escape sequence (or \r or
|
||||||
\r\n, etc., depending on the newline setting) in a single line of input
|
\r\n, etc., depending on the newline setting) in a single line of input
|
||||||
to encode the newline sequences. There is no limit on the length of
|
to encode the newline sequences. There is no limit on the length of
|
||||||
data lines; the input buffer is automatically extended if it is too
|
data lines; the input buffer is automatically extended if it is too
|
||||||
small.
|
small.
|
||||||
|
|
||||||
An empty line signals the end of the data lines, at which point a new
|
An empty line signals the end of the data lines, at which point a new
|
||||||
regular expression is read. The regular expressions are given enclosed
|
regular expression is read. The regular expressions are given enclosed
|
||||||
in any non-alphanumeric delimiters other than backslash, for example:
|
in any non-alphanumeric delimiters other than backslash, for example:
|
||||||
|
|
||||||
/(a|bc)x+yz/
|
/(a|bc)x+yz/
|
||||||
|
|
||||||
White space before the initial delimiter is ignored. A regular expres-
|
White space before the initial delimiter is ignored. A regular expres-
|
||||||
sion may be continued over several input lines, in which case the new-
|
sion may be continued over several input lines, in which case the new-
|
||||||
line characters are included within it. It is possible to include the
|
line characters are included within it. It is possible to include the
|
||||||
delimiter within the pattern by escaping it, for example
|
delimiter within the pattern by escaping it, for example
|
||||||
|
|
||||||
/abc\/def/
|
/abc\/def/
|
||||||
|
|
||||||
If you do so, the escape and the delimiter form part of the pattern,
|
If you do so, the escape and the delimiter form part of the pattern,
|
||||||
but since delimiters are always non-alphanumeric, this does not affect
|
but since delimiters are always non-alphanumeric, this does not affect
|
||||||
its interpretation. If the terminating delimiter is immediately fol-
|
its interpretation. If the terminating delimiter is immediately fol-
|
||||||
lowed by a backslash, for example,
|
lowed by a backslash, for example,
|
||||||
|
|
||||||
/abc/\
|
/abc/\
|
||||||
|
|
||||||
then a backslash is added to the end of the pattern. This is done to
|
then a backslash is added to the end of the pattern. This is done to
|
||||||
provide a way of testing the error condition that arises if a pattern
|
provide a way of testing the error condition that arises if a pattern
|
||||||
finishes with a backslash, because
|
finishes with a backslash, because
|
||||||
|
|
||||||
/abc\/
|
/abc\/
|
||||||
|
|
||||||
is interpreted as the first line of a pattern that starts with "abc/",
|
is interpreted as the first line of a pattern that starts with "abc/",
|
||||||
causing pcretest to read the next line as a continuation of the regular
|
causing pcretest to read the next line as a continuation of the regular
|
||||||
expression.
|
expression.
|
||||||
|
|
||||||
|
|
||||||
PATTERN MODIFIERS
|
PATTERN MODIFIERS
|
||||||
|
|
||||||
A pattern may be followed by any number of modifiers, which are mostly
|
A pattern may be followed by any number of modifiers, which are mostly
|
||||||
single characters, though some of these can be qualified by further
|
single characters, though some of these can be qualified by further
|
||||||
characters. Following Perl usage, these are referred to below as, for
|
characters. Following Perl usage, these are referred to below as, for
|
||||||
example, "the /i modifier", even though the delimiter of the pattern
|
example, "the /i modifier", even though the delimiter of the pattern
|
||||||
need not always be a slash, and no slash is used when writing modi-
|
need not always be a slash, and no slash is used when writing modi-
|
||||||
fiers. White space may appear between the final pattern delimiter and
|
fiers. White space may appear between the final pattern delimiter and
|
||||||
the first modifier, and between the modifiers themselves. For refer-
|
the first modifier, and between the modifiers themselves. For refer-
|
||||||
ence, here is a complete list of modifiers. They fall into several
|
ence, here is a complete list of modifiers. They fall into several
|
||||||
groups that are described in detail in the following sections.
|
groups that are described in detail in the following sections.
|
||||||
|
|
||||||
/8 set UTF mode
|
/8 set UTF mode
|
||||||
@ -307,6 +314,7 @@ PATTERN MODIFIERS
|
|||||||
/M show compiled memory size
|
/M show compiled memory size
|
||||||
/m set PCRE_MULTILINE
|
/m set PCRE_MULTILINE
|
||||||
/N set PCRE_NO_AUTO_CAPTURE
|
/N set PCRE_NO_AUTO_CAPTURE
|
||||||
|
/O set PCRE_NO_AUTO_POSSESS
|
||||||
/P use the POSIX wrapper
|
/P use the POSIX wrapper
|
||||||
/S study the pattern after compilation
|
/S study the pattern after compilation
|
||||||
/s set PCRE_DOTALL
|
/s set PCRE_DOTALL
|
||||||
@ -331,8 +339,8 @@ PATTERN MODIFIERS
|
|||||||
Perl-compatible modifiers
|
Perl-compatible modifiers
|
||||||
|
|
||||||
The /i, /m, /s, and /x modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
|
The /i, /m, /s, and /x modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
|
||||||
PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
|
PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
|
||||||
pcre[16|32]_compile() is called. These four modifier letters have the
|
pcre[16|32]_compile() is called. These four modifier letters have the
|
||||||
same effect as they do in Perl. For example:
|
same effect as they do in Perl. For example:
|
||||||
|
|
||||||
/caseless/i
|
/caseless/i
|
||||||
@ -340,7 +348,7 @@ PATTERN MODIFIERS
|
|||||||
|
|
||||||
Modifiers for other PCRE options
|
Modifiers for other PCRE options
|
||||||
|
|
||||||
The following table shows additional modifiers for setting PCRE com-
|
The following table shows additional modifiers for setting PCRE com-
|
||||||
pile-time options that do not correspond to anything in Perl:
|
pile-time options that do not correspond to anything in Perl:
|
||||||
|
|
||||||
/8 PCRE_UTF8 ) when using the 8-bit
|
/8 PCRE_UTF8 ) when using the 8-bit
|
||||||
@ -359,6 +367,7 @@ PATTERN MODIFIERS
|
|||||||
/f PCRE_FIRSTLINE
|
/f PCRE_FIRSTLINE
|
||||||
/J PCRE_DUPNAMES
|
/J PCRE_DUPNAMES
|
||||||
/N PCRE_NO_AUTO_CAPTURE
|
/N PCRE_NO_AUTO_CAPTURE
|
||||||
|
/O PCRE_NO_AUTO_POSSESS
|
||||||
/U PCRE_UNGREEDY
|
/U PCRE_UNGREEDY
|
||||||
/W PCRE_UCP
|
/W PCRE_UCP
|
||||||
/X PCRE_EXTRA
|
/X PCRE_EXTRA
|
||||||
@ -372,138 +381,138 @@ PATTERN MODIFIERS
|
|||||||
/<bsr_unicode> PCRE_BSR_UNICODE
|
/<bsr_unicode> PCRE_BSR_UNICODE
|
||||||
/<JS> PCRE_JAVASCRIPT_COMPAT
|
/<JS> PCRE_JAVASCRIPT_COMPAT
|
||||||
|
|
||||||
The modifiers that are enclosed in angle brackets are literal strings
|
The modifiers that are enclosed in angle brackets are literal strings
|
||||||
as shown, including the angle brackets, but the letters within can be
|
as shown, including the angle brackets, but the letters within can be
|
||||||
in either case. This example sets multiline matching with CRLF as the
|
in either case. This example sets multiline matching with CRLF as the
|
||||||
line ending sequence:
|
line ending sequence:
|
||||||
|
|
||||||
/^abc/m<CRLF>
|
/^abc/m<CRLF>
|
||||||
|
|
||||||
As well as turning on the PCRE_UTF8/16/32 option, the /8 modifier
|
As well as turning on the PCRE_UTF8/16/32 option, the /8 modifier
|
||||||
causes all non-printing characters in output strings to be printed
|
causes all non-printing characters in output strings to be printed
|
||||||
using the \x{hh...} notation. Otherwise, those less than 0x100 are out-
|
using the \x{hh...} notation. Otherwise, those less than 0x100 are out-
|
||||||
put in hex without the curly brackets.
|
put in hex without the curly brackets.
|
||||||
|
|
||||||
Full details of the PCRE options are given in the pcreapi documenta-
|
Full details of the PCRE options are given in the pcreapi documenta-
|
||||||
tion.
|
tion.
|
||||||
|
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
Searching for all possible matches within each subject string can be
|
Searching for all possible matches within each subject string can be
|
||||||
requested by the /g or /G modifier. After finding a match, PCRE is
|
requested by the /g or /G modifier. After finding a match, PCRE is
|
||||||
called again to search the remainder of the subject string. The differ-
|
called again to search the remainder of the subject string. The differ-
|
||||||
ence between /g and /G is that the former uses the startoffset argument
|
ence between /g and /G is that the former uses the startoffset argument
|
||||||
to pcre[16|32]_exec() to start searching at a new point within the
|
to pcre[16|32]_exec() to start searching at a new point within the
|
||||||
entire string (which is in effect what Perl does), whereas the latter
|
entire string (which is in effect what Perl does), whereas the latter
|
||||||
passes over a shortened substring. This makes a difference to the
|
passes over a shortened substring. This makes a difference to the
|
||||||
matching process if the pattern begins with a lookbehind assertion
|
matching process if the pattern begins with a lookbehind assertion
|
||||||
(including \b or \B).
|
(including \b or \B).
|
||||||
|
|
||||||
If any call to pcre[16|32]_exec() in a /g or /G sequence matches an
|
If any call to pcre[16|32]_exec() in a /g or /G sequence matches an
|
||||||
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
|
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
|
||||||
PCRE_ANCHORED flags set in order to search for another, non-empty,
|
PCRE_ANCHORED flags set in order to search for another, non-empty,
|
||||||
match at the same point. If this second match fails, the start offset
|
match at the same point. If this second match fails, the start offset
|
||||||
is advanced, and the normal match is retried. This imitates the way
|
is advanced, and the normal match is retried. This imitates the way
|
||||||
Perl handles such cases when using the /g modifier or the split() func-
|
Perl handles such cases when using the /g modifier or the split() func-
|
||||||
tion. Normally, the start offset is advanced by one character, but if
|
tion. Normally, the start offset is advanced by one character, but if
|
||||||
the newline convention recognizes CRLF as a newline, and the current
|
the newline convention recognizes CRLF as a newline, and the current
|
||||||
character is CR followed by LF, an advance of two is used.
|
character is CR followed by LF, an advance of two is used.
|
||||||
|
|
||||||
Other modifiers
|
Other modifiers
|
||||||
|
|
||||||
There are yet more modifiers for controlling the way pcretest operates.
|
There are yet more modifiers for controlling the way pcretest operates.
|
||||||
|
|
||||||
The /+ modifier requests that as well as outputting the substring that
|
The /+ modifier requests that as well as outputting the substring that
|
||||||
matched the entire pattern, pcretest should in addition output the
|
matched the entire pattern, pcretest should in addition output the
|
||||||
remainder of the subject string. This is useful for tests where the
|
remainder of the subject string. This is useful for tests where the
|
||||||
subject contains multiple copies of the same substring. If the + modi-
|
subject contains multiple copies of the same substring. If the + modi-
|
||||||
fier appears twice, the same action is taken for captured substrings.
|
fier appears twice, the same action is taken for captured substrings.
|
||||||
In each case the remainder is output on the following line with a plus
|
In each case the remainder is output on the following line with a plus
|
||||||
character following the capture number. Note that this modifier must
|
character following the capture number. Note that this modifier must
|
||||||
not immediately follow the /S modifier because /S+ and /S++ have other
|
not immediately follow the /S modifier because /S+ and /S++ have other
|
||||||
meanings.
|
meanings.
|
||||||
|
|
||||||
The /= modifier requests that the values of all potential captured
|
The /= modifier requests that the values of all potential captured
|
||||||
parentheses be output after a match. By default, only those up to the
|
parentheses be output after a match. By default, only those up to the
|
||||||
highest one actually used in the match are output (corresponding to the
|
highest one actually used in the match are output (corresponding to the
|
||||||
return code from pcre[16|32]_exec()). Values in the offsets vector cor-
|
return code from pcre[16|32]_exec()). Values in the offsets vector cor-
|
||||||
responding to higher numbers should be set to -1, and these are output
|
responding to higher numbers should be set to -1, and these are output
|
||||||
as "<unset>". This modifier gives a way of checking that this is hap-
|
as "<unset>". This modifier gives a way of checking that this is hap-
|
||||||
pening.
|
pening.
|
||||||
|
|
||||||
The /B modifier is a debugging feature. It requests that pcretest out-
|
The /B modifier is a debugging feature. It requests that pcretest out-
|
||||||
put a representation of the compiled code after compilation. Normally
|
put a representation of the compiled code after compilation. Normally
|
||||||
this information contains length and offset values; however, if /Z is
|
this information contains length and offset values; however, if /Z is
|
||||||
also present, this data is replaced by spaces. This is a special fea-
|
also present, this data is replaced by spaces. This is a special fea-
|
||||||
ture for use in the automatic test scripts; it ensures that the same
|
ture for use in the automatic test scripts; it ensures that the same
|
||||||
output is generated for different internal link sizes.
|
output is generated for different internal link sizes.
|
||||||
|
|
||||||
The /D modifier is a PCRE debugging feature, and is equivalent to /BI,
|
The /D modifier is a PCRE debugging feature, and is equivalent to /BI,
|
||||||
that is, both the /B and the /I modifiers.
|
that is, both the /B and the /I modifiers.
|
||||||
|
|
||||||
The /F modifier causes pcretest to flip the byte order of the 2-byte
|
The /F modifier causes pcretest to flip the byte order of the 2-byte
|
||||||
and 4-byte fields in the compiled pattern. This facility is for testing
|
and 4-byte fields in the compiled pattern. This facility is for testing
|
||||||
the feature in PCRE that allows it to execute patterns that were com-
|
the feature in PCRE that allows it to execute patterns that were com-
|
||||||
piled on a host with a different endianness. This feature is not avail-
|
piled on a host with a different endianness. This feature is not avail-
|
||||||
able when the POSIX interface to PCRE is being used, that is, when the
|
able when the POSIX interface to PCRE is being used, that is, when the
|
||||||
/P pattern modifier is specified. See also the section about saving and
|
/P pattern modifier is specified. See also the section about saving and
|
||||||
reloading compiled patterns below.
|
reloading compiled patterns below.
|
||||||
|
|
||||||
The /I modifier requests that pcretest output information about the
|
The /I modifier requests that pcretest output information about the
|
||||||
compiled pattern (whether it is anchored, has a fixed first character,
|
compiled pattern (whether it is anchored, has a fixed first character,
|
||||||
and so on). It does this by calling pcre[16|32]_fullinfo() after com-
|
and so on). It does this by calling pcre[16|32]_fullinfo() after com-
|
||||||
piling a pattern. If the pattern is studied, the results of that are
|
piling a pattern. If the pattern is studied, the results of that are
|
||||||
also output.
|
also output.
|
||||||
|
|
||||||
The /K modifier requests pcretest to show names from backtracking con-
|
The /K modifier requests pcretest to show names from backtracking con-
|
||||||
trol verbs that are returned from calls to pcre[16|32]_exec(). It
|
trol verbs that are returned from calls to pcre[16|32]_exec(). It
|
||||||
causes pcretest to create a pcre[16|32]_extra block if one has not
|
causes pcretest to create a pcre[16|32]_extra block if one has not
|
||||||
already been created by a call to pcre[16|32]_study(), and to set the
|
already been created by a call to pcre[16|32]_study(), and to set the
|
||||||
PCRE_EXTRA_MARK flag and the mark field within it, every time that
|
PCRE_EXTRA_MARK flag and the mark field within it, every time that
|
||||||
pcre[16|32]_exec() is called. If the variable that the mark field
|
pcre[16|32]_exec() is called. If the variable that the mark field
|
||||||
points to is non-NULL for a match, non-match, or partial match,
|
points to is non-NULL for a match, non-match, or partial match,
|
||||||
pcretest prints the string to which it points. For a match, this is
|
pcretest prints the string to which it points. For a match, this is
|
||||||
shown on a line by itself, tagged with "MK:". For a non-match it is
|
shown on a line by itself, tagged with "MK:". For a non-match it is
|
||||||
added to the message.
|
added to the message.
|
||||||
|
|
||||||
The /L modifier must be followed directly by the name of a locale, for
|
The /L modifier must be followed directly by the name of a locale, for
|
||||||
example,
|
example,
|
||||||
|
|
||||||
/pattern/Lfr_FR
|
/pattern/Lfr_FR
|
||||||
|
|
||||||
For this reason, it must be the last modifier. The given locale is set,
|
For this reason, it must be the last modifier. The given locale is set,
|
||||||
pcre[16|32]_maketables() is called to build a set of character tables
|
pcre[16|32]_maketables() is called to build a set of character tables
|
||||||
for the locale, and this is then passed to pcre[16|32]_compile() when
|
for the locale, and this is then passed to pcre[16|32]_compile() when
|
||||||
compiling the regular expression. Without an /L (or /T) modifier, NULL
|
compiling the regular expression. Without an /L (or /T) modifier, NULL
|
||||||
is passed as the tables pointer; that is, /L applies only to the
|
is passed as the tables pointer; that is, /L applies only to the
|
||||||
expression on which it appears.
|
expression on which it appears.
|
||||||
|
|
||||||
The /M modifier causes the size in bytes of the memory block used to
|
The /M modifier causes the size in bytes of the memory block used to
|
||||||
hold the compiled pattern to be output. This does not include the size
|
hold the compiled pattern to be output. This does not include the size
|
||||||
of the pcre[16|32] block; it is just the actual compiled data. If the
|
of the pcre[16|32] block; it is just the actual compiled data. If the
|
||||||
pattern is successfully studied with the PCRE_STUDY_JIT_COMPILE option,
|
pattern is successfully studied with the PCRE_STUDY_JIT_COMPILE option,
|
||||||
the size of the JIT compiled code is also output.
|
the size of the JIT compiled code is also output.
|
||||||
|
|
||||||
The /S modifier causes pcre[16|32]_study() to be called after the
|
The /S modifier causes pcre[16|32]_study() to be called after the
|
||||||
expression has been compiled, and the results used when the expression
|
expression has been compiled, and the results used when the expression
|
||||||
is matched. There are a number of qualifying characters that may follow
|
is matched. There are a number of qualifying characters that may follow
|
||||||
/S. They may appear in any order.
|
/S. They may appear in any order.
|
||||||
|
|
||||||
If S is followed by an exclamation mark, pcre[16|32]_study() is called
|
If /S is followed by an exclamation mark, pcre[16|32]_study() is called
|
||||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||||
pcre_extra block, even when studying discovers no useful information.
|
pcre_extra block, even when studying discovers no useful information.
|
||||||
|
|
||||||
If /S is followed by a second S character, it suppresses studying, even
|
If /S is followed by a second S character, it suppresses studying, even
|
||||||
if it was requested externally by the -s command line option. This
|
if it was requested externally by the -s command line option. This
|
||||||
makes it possible to specify that certain patterns are always studied,
|
makes it possible to specify that certain patterns are always studied,
|
||||||
and others are never studied, independently of -s. This feature is used
|
and others are never studied, independently of -s. This feature is used
|
||||||
in the test files in a few cases where the output is different when the
|
in the test files in a few cases where the output is different when the
|
||||||
pattern is studied.
|
pattern is studied.
|
||||||
|
|
||||||
If the /S modifier is followed by a + character, the call to
|
If the /S modifier is followed by a + character, the call to
|
||||||
pcre[16|32]_study() is made with all the JIT study options, requesting
|
pcre[16|32]_study() is made with all the JIT study options, requesting
|
||||||
just-in-time optimization support if it is available, for both normal
|
just-in-time optimization support if it is available, for both normal
|
||||||
and partial matching. If you want to restrict the JIT compiling modes,
|
and partial matching. If you want to restrict the JIT compiling modes,
|
||||||
you can follow /S+ with a digit in the range 1 to 7:
|
you can follow /S+ with a digit in the range 1 to 7:
|
||||||
|
|
||||||
1 normal match only
|
1 normal match only
|
||||||
@ -514,40 +523,40 @@ PATTERN MODIFIERS
|
|||||||
7 all three modes (default)
|
7 all three modes (default)
|
||||||
|
|
||||||
If /S++ is used instead of /S+ (with or without a following digit), the
|
If /S++ is used instead of /S+ (with or without a following digit), the
|
||||||
text "(JIT)" is added to the first output line after a match or no
|
text "(JIT)" is added to the first output line after a match or no
|
||||||
match when JIT-compiled code was actually used.
|
match when JIT-compiled code was actually used.
|
||||||
|
|
||||||
Note that there is also an independent /+ modifier; it must not be
|
Note that there is also an independent /+ modifier; it must not be
|
||||||
given immediately after /S or /S+ because this will be misinterpreted.
|
given immediately after /S or /S+ because this will be misinterpreted.
|
||||||
|
|
||||||
If JIT studying is successful, the compiled JIT code will automatically
|
If JIT studying is successful, the compiled JIT code will automatically
|
||||||
be used when pcre[16|32]_exec() is run, except when incompatible run-
|
be used when pcre[16|32]_exec() is run, except when incompatible run-
|
||||||
time options are specified. For more details, see the pcrejit documen-
|
time options are specified. For more details, see the pcrejit documen-
|
||||||
tation. See also the \J escape sequence below for a way of setting the
|
tation. See also the \J escape sequence below for a way of setting the
|
||||||
size of the JIT stack.
|
size of the JIT stack.
|
||||||
|
|
||||||
Finally, if /S is followed by a minus character, JIT compilation is
|
Finally, if /S is followed by a minus character, JIT compilation is
|
||||||
suppressed, even if it was requested externally by the -s command line
|
suppressed, even if it was requested externally by the -s command line
|
||||||
option. This makes it possible to specify that JIT is never to be used
|
option. This makes it possible to specify that JIT is never to be used
|
||||||
for certain patterns.
|
for certain patterns.
|
||||||
|
|
||||||
The /T modifier must be followed by a single digit. It causes a spe-
|
The /T modifier must be followed by a single digit. It causes a spe-
|
||||||
cific set of built-in character tables to be passed to pcre[16|32]_com-
|
cific set of built-in character tables to be passed to pcre[16|32]_com-
|
||||||
pile(). It is used in the standard PCRE tests to check behaviour with
|
pile(). It is used in the standard PCRE tests to check behaviour with
|
||||||
different character tables. The digit specifies the tables as follows:
|
different character tables. The digit specifies the tables as follows:
|
||||||
|
|
||||||
0 the default ASCII tables, as distributed in
|
0 the default ASCII tables, as distributed in
|
||||||
pcre_chartables.c.dist
|
pcre_chartables.c.dist
|
||||||
1 a set of tables defining ISO 8859 characters
|
1 a set of tables defining ISO 8859 characters
|
||||||
|
|
||||||
In table 1, some characters whose codes are greater than 128 are iden-
|
In table 1, some characters whose codes are greater than 128 are iden-
|
||||||
tified as letters, digits, spaces, etc.
|
tified as letters, digits, spaces, etc.
|
||||||
|
|
||||||
Using the POSIX wrapper API
|
Using the POSIX wrapper API
|
||||||
|
|
||||||
The /P modifier causes pcretest to call PCRE via the POSIX wrapper API
|
The /P modifier causes pcretest to call PCRE via the POSIX wrapper API
|
||||||
rather than its native API. This supports only the 8-bit library. When
|
rather than its native API. This supports only the 8-bit library. When
|
||||||
/P is set, the following modifiers set options for the regcomp() func-
|
/P is set, the following modifiers set options for the regcomp() func-
|
||||||
tion:
|
tion:
|
||||||
|
|
||||||
/i REG_ICASE
|
/i REG_ICASE
|
||||||
@ -558,9 +567,40 @@ PATTERN MODIFIERS
|
|||||||
/W REG_UCP ) the POSIX standard
|
/W REG_UCP ) the POSIX standard
|
||||||
/8 REG_UTF8 )
|
/8 REG_UTF8 )
|
||||||
|
|
||||||
The /+ modifier works as described above. All other modifiers are
|
The /+ modifier works as described above. All other modifiers are
|
||||||
ignored.
|
ignored.
|
||||||
|
|
||||||
|
Locking out certain modifiers
|
||||||
|
|
||||||
|
PCRE can be compiled with or without support for certain features such
|
||||||
|
as UTF-8/16/32 or Unicode properties. Accordingly, the standard tests
|
||||||
|
are split up into a number of different files that are selected for
|
||||||
|
running depending on which features are available. When updating the
|
||||||
|
tests, it is all too easy to put a new test into the wrong file by mis-
|
||||||
|
take; for example, to put a test that requires UTF support into a file
|
||||||
|
that is used when it is not available. To help detect such mistakes as
|
||||||
|
early as possible, there is a facility for locking out specific modi-
|
||||||
|
fiers. If an input line for pcretest starts with the string "< forbid "
|
||||||
|
the following sequence of characters is taken as a list of forbidden
|
||||||
|
modifiers. For example, in the test files that must not use UTF or Uni-
|
||||||
|
code property support, this line appears:
|
||||||
|
|
||||||
|
< forbid 8W
|
||||||
|
|
||||||
|
This locks out the /8 and /W modifiers. An immediate error is given if
|
||||||
|
they are subsequently encountered. If the character string contains <
|
||||||
|
but not >, all the multi-character modifiers that begin with < are
|
||||||
|
locked out. Otherwise, such modifiers must be explicitly listed, for
|
||||||
|
example:
|
||||||
|
|
||||||
|
< forbid <JS><cr>
|
||||||
|
|
||||||
|
There must be a single space between < and "forbid" for this feature to
|
||||||
|
be recognised. If there is not, the line is interpreted either as a
|
||||||
|
request to re-load a pre-compiled pattern (see "SAVING AND RELOADING
|
||||||
|
COMPILED PATTERNS" below) or, if there is a another < character, as a
|
||||||
|
pattern that uses < as its delimiter.
|
||||||
|
|
||||||
|
|
||||||
DATA LINES
|
DATA LINES
|
||||||
|
|
||||||
@ -583,6 +623,7 @@ DATA LINES
|
|||||||
\v vertical tab (\x0b)
|
\v vertical tab (\x0b)
|
||||||
\nnn octal character (up to 3 octal digits); always
|
\nnn octal character (up to 3 octal digits); always
|
||||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||||
|
\o{dd...} octal character (any number of octal digits}
|
||||||
\xhh hexadecimal byte (up to 2 hex digits)
|
\xhh hexadecimal byte (up to 2 hex digits)
|
||||||
\x{hh...} hexadecimal character (any number of hex digits)
|
\x{hh...} hexadecimal character (any number of hex digits)
|
||||||
\A pass the PCRE_ANCHORED option to pcre[16|32]_exec()
|
\A pass the PCRE_ANCHORED option to pcre[16|32]_exec()
|
||||||
@ -974,50 +1015,51 @@ SAVING AND RELOADING COMPILED PATTERNS
|
|||||||
writing the file, pcretest expects to read a new pattern.
|
writing the file, pcretest expects to read a new pattern.
|
||||||
|
|
||||||
A saved pattern can be reloaded into pcretest by specifying < and a
|
A saved pattern can be reloaded into pcretest by specifying < and a
|
||||||
file name instead of a pattern. The name of the file must not contain a
|
file name instead of a pattern. There must be no space between < and
|
||||||
< character, as otherwise pcretest will interpret the line as a pattern
|
the file name, which must not contain a < character, as otherwise
|
||||||
delimited by < characters. For example:
|
pcretest will interpret the line as a pattern delimited by < charac-
|
||||||
|
ters. For example:
|
||||||
|
|
||||||
re> </some/file
|
re> </some/file
|
||||||
Compiled pattern loaded from /some/file
|
Compiled pattern loaded from /some/file
|
||||||
No study data
|
No study data
|
||||||
|
|
||||||
If the pattern was previously studied with the JIT optimization, the
|
If the pattern was previously studied with the JIT optimization, the
|
||||||
JIT information cannot be saved and restored, and so is lost. When the
|
JIT information cannot be saved and restored, and so is lost. When the
|
||||||
pattern has been loaded, pcretest proceeds to read data lines in the
|
pattern has been loaded, pcretest proceeds to read data lines in the
|
||||||
usual way.
|
usual way.
|
||||||
|
|
||||||
You can copy a file written by pcretest to a different host and reload
|
You can copy a file written by pcretest to a different host and reload
|
||||||
it there, even if the new host has opposite endianness to the one on
|
it there, even if the new host has opposite endianness to the one on
|
||||||
which the pattern was compiled. For example, you can compile on an i86
|
which the pattern was compiled. For example, you can compile on an i86
|
||||||
machine and run on a SPARC machine. When a pattern is reloaded on a
|
machine and run on a SPARC machine. When a pattern is reloaded on a
|
||||||
host with different endianness, the confirmation message is changed to:
|
host with different endianness, the confirmation message is changed to:
|
||||||
|
|
||||||
Compiled pattern (byte-inverted) loaded from /some/file
|
Compiled pattern (byte-inverted) loaded from /some/file
|
||||||
|
|
||||||
The test suite contains some saved pre-compiled patterns with different
|
The test suite contains some saved pre-compiled patterns with different
|
||||||
endianness. These are reloaded using "<!" instead of just "<". This
|
endianness. These are reloaded using "<!" instead of just "<". This
|
||||||
suppresses the "(byte-inverted)" text so that the output is the same on
|
suppresses the "(byte-inverted)" text so that the output is the same on
|
||||||
all hosts. It also forces debugging output once the pattern has been
|
all hosts. It also forces debugging output once the pattern has been
|
||||||
reloaded.
|
reloaded.
|
||||||
|
|
||||||
File names for saving and reloading can be absolute or relative, but
|
File names for saving and reloading can be absolute or relative, but
|
||||||
note that the shell facility of expanding a file name that starts with
|
note that the shell facility of expanding a file name that starts with
|
||||||
a tilde (~) is not available.
|
a tilde (~) is not available.
|
||||||
|
|
||||||
The ability to save and reload files in pcretest is intended for test-
|
The ability to save and reload files in pcretest is intended for test-
|
||||||
ing and experimentation. It is not intended for production use because
|
ing and experimentation. It is not intended for production use because
|
||||||
only a single pattern can be written to a file. Furthermore, there is
|
only a single pattern can be written to a file. Furthermore, there is
|
||||||
no facility for supplying custom character tables for use with a
|
no facility for supplying custom character tables for use with a
|
||||||
reloaded pattern. If the original pattern was compiled with custom
|
reloaded pattern. If the original pattern was compiled with custom
|
||||||
tables, an attempt to match a subject string using a reloaded pattern
|
tables, an attempt to match a subject string using a reloaded pattern
|
||||||
is likely to cause pcretest to crash. Finally, if you attempt to load
|
is likely to cause pcretest to crash. Finally, if you attempt to load
|
||||||
a file that is not in the correct format, the result is undefined.
|
a file that is not in the correct format, the result is undefined.
|
||||||
|
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
|
|
||||||
pcre(3), pcre16(3), pcre32(3), pcreapi(3), pcrecallout(3), pcrejit,
|
pcre(3), pcre16(3), pcre32(3), pcreapi(3), pcrecallout(3), pcrejit,
|
||||||
pcrematching(3), pcrepartial(d), pcrepattern(3), pcreprecompile(3).
|
pcrematching(3), pcrepartial(d), pcrepattern(3), pcreprecompile(3).
|
||||||
|
|
||||||
|
|
||||||
@ -1030,5 +1072,5 @@ AUTHOR
|
|||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 26 April 2013
|
Last updated: 12 November 2013
|
||||||
Copyright (c) 1997-2013 University of Cambridge.
|
Copyright (c) 1997-2013 University of Cambridge.
|
||||||
|
57
pcre/maria-patches/pcre_stack_guard.diff
Normal file
57
pcre/maria-patches/pcre_stack_guard.diff
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
=== modified file 'pcre/pcre.h.in'
|
||||||
|
--- pcre/pcre.h.in 2013-09-26 14:02:17 +0000
|
||||||
|
+++ pcre/pcre.h.in 2013-10-02 07:58:29 +0000
|
||||||
|
@@ -486,6 +486,7 @@ PCRE_EXP_DECL void (*pcre_free)(void *)
|
||||||
|
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||||
|
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
+PCRE_EXP_DECL int (*pcre_stack_guard)(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||||
|
@@ -504,6 +505,7 @@ PCRE_EXP_DECL void pcre_free(void *);
|
||||||
|
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||||
|
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||||
|
+PCRE_EXP_DECL int pcre_stack_guard(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre16_free(void *);
|
||||||
|
|
||||||
|
=== modified file 'pcre/pcre_compile.c'
|
||||||
|
--- pcre/pcre_compile.c 2013-09-26 14:02:17 +0000
|
||||||
|
+++ pcre/pcre_compile.c 2013-10-02 07:58:29 +0000
|
||||||
|
@@ -7107,6 +7107,12 @@ unsigned int orig_bracount;
|
||||||
|
unsigned int max_bracount;
|
||||||
|
branch_chain bc;
|
||||||
|
|
||||||
|
+if (pcre_stack_guard && pcre_stack_guard())
|
||||||
|
+{
|
||||||
|
+ *errorcodeptr= ERR23;
|
||||||
|
+ return FALSE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
bc.outer = bcptr;
|
||||||
|
bc.current_branch = code;
|
||||||
|
|
||||||
|
|
||||||
|
=== modified file 'pcre/pcre_globals.c'
|
||||||
|
--- pcre/pcre_globals.c 2013-09-26 14:02:17 +0000
|
||||||
|
+++ pcre/pcre_globals.c 2013-10-02 07:58:29 +0000
|
||||||
|
@@ -72,6 +72,7 @@ PCRE_EXP_DATA_DEFN void (*PUBL(free))(v
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||||
|
+PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||||
|
|
||||||
|
#elif !defined VPCOMPAT
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||||
|
@@ -79,6 +80,7 @@ PCRE_EXP_DATA_DEFN void (*PUBL(free))(v
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||||
|
+PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of pcre_globals.c */
|
||||||
|
|
@ -150,7 +150,10 @@ with J. */
|
|||||||
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
||||||
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
||||||
|
|
||||||
#define PCRE_DFA_RESTART 0x00020000 /* D */
|
/* This pair use the same bit. */
|
||||||
|
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
|
||||||
|
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
|
||||||
|
|
||||||
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
||||||
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
||||||
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
||||||
@ -277,6 +280,7 @@ with J. */
|
|||||||
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
||||||
#define PCRE_INFO_MATCHLIMIT 23
|
#define PCRE_INFO_MATCHLIMIT 23
|
||||||
#define PCRE_INFO_RECURSIONLIMIT 24
|
#define PCRE_INFO_RECURSIONLIMIT 24
|
||||||
|
#define PCRE_INFO_MATCH_EMPTY 25
|
||||||
|
|
||||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||||
compatible. */
|
compatible. */
|
||||||
@ -294,6 +298,7 @@ compatible. */
|
|||||||
#define PCRE_CONFIG_UTF16 10
|
#define PCRE_CONFIG_UTF16 10
|
||||||
#define PCRE_CONFIG_JITTARGET 11
|
#define PCRE_CONFIG_JITTARGET 11
|
||||||
#define PCRE_CONFIG_UTF32 12
|
#define PCRE_CONFIG_UTF32 12
|
||||||
|
#define PCRE_CONFIG_PARENS_LIMIT 13
|
||||||
|
|
||||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||||
compatible. */
|
compatible. */
|
||||||
@ -657,6 +662,9 @@ PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
|||||||
pcre16_jit_callback, void *);
|
pcre16_jit_callback, void *);
|
||||||
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
||||||
pcre32_jit_callback, void *);
|
pcre32_jit_callback, void *);
|
||||||
|
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
|
||||||
|
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
|
||||||
|
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||||
|
@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||||
|
3503
pcre/pcre_compile.c
3503
pcre/pcre_compile.c
File diff suppressed because it is too large
Load Diff
@ -161,6 +161,10 @@ switch (what)
|
|||||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_PARENS_LIMIT:
|
||||||
|
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_MATCH_LIMIT:
|
case PCRE_CONFIG_MATCH_LIMIT:
|
||||||
*((unsigned long int *)where) = MATCH_LIMIT;
|
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||||
break;
|
break;
|
||||||
|
@ -120,7 +120,7 @@ static const pcre_uint8 coptable[] = {
|
|||||||
0, 0, /* \P, \p */
|
0, 0, /* \P, \p */
|
||||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||||
0, /* \X */
|
0, /* \X */
|
||||||
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
|
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||||
1, /* Char */
|
1, /* Char */
|
||||||
1, /* Chari */
|
1, /* Chari */
|
||||||
1, /* not */
|
1, /* not */
|
||||||
@ -151,11 +151,14 @@ static const pcre_uint8 coptable[] = {
|
|||||||
/* Character class & ref repeats */
|
/* Character class & ref repeats */
|
||||||
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
||||||
0, 0, /* CRRANGE, CRMINRANGE */
|
0, 0, /* CRRANGE, CRMINRANGE */
|
||||||
|
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||||
0, /* CLASS */
|
0, /* CLASS */
|
||||||
0, /* NCLASS */
|
0, /* NCLASS */
|
||||||
0, /* XCLASS - variable length */
|
0, /* XCLASS - variable length */
|
||||||
0, /* REF */
|
0, /* REF */
|
||||||
0, /* REFI */
|
0, /* REFI */
|
||||||
|
0, /* DNREF */
|
||||||
|
0, /* DNREFI */
|
||||||
0, /* RECURSE */
|
0, /* RECURSE */
|
||||||
0, /* CALLOUT */
|
0, /* CALLOUT */
|
||||||
0, /* Alt */
|
0, /* Alt */
|
||||||
@ -171,8 +174,8 @@ static const pcre_uint8 coptable[] = {
|
|||||||
0, 0, /* ONCE, ONCE_NC */
|
0, 0, /* ONCE, ONCE_NC */
|
||||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
0, 0, /* CREF, NCREF */
|
0, 0, /* CREF, DNCREF */
|
||||||
0, 0, /* RREF, NRREF */
|
0, 0, /* RREF, DNRREF */
|
||||||
0, /* DEF */
|
0, /* DEF */
|
||||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
@ -194,7 +197,7 @@ static const pcre_uint8 poptable[] = {
|
|||||||
1, 1, /* \P, \p */
|
1, 1, /* \P, \p */
|
||||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||||
1, /* \X */
|
1, /* \X */
|
||||||
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
|
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||||
1, /* Char */
|
1, /* Char */
|
||||||
1, /* Chari */
|
1, /* Chari */
|
||||||
1, /* not */
|
1, /* not */
|
||||||
@ -220,11 +223,14 @@ static const pcre_uint8 poptable[] = {
|
|||||||
/* Character class & ref repeats */
|
/* Character class & ref repeats */
|
||||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||||
1, 1, /* CRRANGE, CRMINRANGE */
|
1, 1, /* CRRANGE, CRMINRANGE */
|
||||||
|
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||||
1, /* CLASS */
|
1, /* CLASS */
|
||||||
1, /* NCLASS */
|
1, /* NCLASS */
|
||||||
1, /* XCLASS - variable length */
|
1, /* XCLASS - variable length */
|
||||||
0, /* REF */
|
0, /* REF */
|
||||||
0, /* REFI */
|
0, /* REFI */
|
||||||
|
0, /* DNREF */
|
||||||
|
0, /* DNREFI */
|
||||||
0, /* RECURSE */
|
0, /* RECURSE */
|
||||||
0, /* CALLOUT */
|
0, /* CALLOUT */
|
||||||
0, /* Alt */
|
0, /* Alt */
|
||||||
@ -240,8 +246,8 @@ static const pcre_uint8 poptable[] = {
|
|||||||
0, 0, /* ONCE, ONCE_NC */
|
0, 0, /* ONCE, ONCE_NC */
|
||||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||||
0, 0, /* CREF, NCREF */
|
0, 0, /* CREF, DNCREF */
|
||||||
0, 0, /* RREF, NRREF */
|
0, 0, /* RREF, DNRREF */
|
||||||
0, /* DEF */
|
0, /* DEF */
|
||||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||||
@ -1094,15 +1100,23 @@ for (;;)
|
|||||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
was changed at release 8.34. */
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
switch(c)
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
{
|
||||||
c == CHAR_FF || c == CHAR_CR;
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -1344,15 +1358,23 @@ for (;;)
|
|||||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
was changed at release 8.34. */
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
switch(c)
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
{
|
||||||
c == CHAR_FF || c == CHAR_CR;
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -1588,15 +1610,23 @@ for (;;)
|
|||||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
was changed at release 8.34. */
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
switch(c)
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
{
|
||||||
c == CHAR_FF || c == CHAR_CR;
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -1857,15 +1887,23 @@ for (;;)
|
|||||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
was changed at release 8.34. */
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
switch(c)
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
{
|
||||||
c == CHAR_FF || c == CHAR_CR;
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
OK = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -2533,31 +2571,65 @@ for (;;)
|
|||||||
{
|
{
|
||||||
case OP_CRSTAR:
|
case OP_CRSTAR:
|
||||||
case OP_CRMINSTAR:
|
case OP_CRMINSTAR:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||||
if (isinclass) { ADD_NEW(state_offset, 0); }
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (*ecode == OP_CRPOSSTAR)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(state_offset, 0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CRPLUS:
|
case OP_CRPLUS:
|
||||||
case OP_CRMINPLUS:
|
case OP_CRMINPLUS:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
count = current_state->count; /* Already matched */
|
count = current_state->count; /* Already matched */
|
||||||
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
||||||
if (isinclass) { count++; ADD_NEW(state_offset, count); }
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
ADD_NEW(state_offset, count);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CRQUERY:
|
case OP_CRQUERY:
|
||||||
case OP_CRMINQUERY:
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||||
if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
|
if (isinclass)
|
||||||
|
{
|
||||||
|
if (*ecode == OP_CRPOSQUERY)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
|
ADD_NEW(next_state_offset + 1, 0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CRRANGE:
|
case OP_CRRANGE:
|
||||||
case OP_CRMINRANGE:
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
count = current_state->count; /* Already matched */
|
count = current_state->count; /* Already matched */
|
||||||
if (count >= (int)GET2(ecode, 1))
|
if (count >= (int)GET2(ecode, 1))
|
||||||
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||||
if (isinclass)
|
if (isinclass)
|
||||||
{
|
{
|
||||||
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
||||||
|
if (*ecode == OP_CRPOSRANGE)
|
||||||
|
{
|
||||||
|
active_count--; /* Remove non-match possibility */
|
||||||
|
next_active_state--;
|
||||||
|
}
|
||||||
if (++count >= max && max != 0) /* Max 0 => no limit */
|
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||||
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||||
else
|
else
|
||||||
@ -2657,9 +2729,11 @@ for (;;)
|
|||||||
|
|
||||||
condcode = code[LINK_SIZE+1];
|
condcode = code[LINK_SIZE+1];
|
||||||
|
|
||||||
/* Back reference conditions are not supported */
|
/* Back reference conditions and duplicate named recursion conditions
|
||||||
|
are not supported */
|
||||||
|
|
||||||
if (condcode == OP_CREF || condcode == OP_NCREF)
|
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
||||||
|
condcode == OP_DNRREF)
|
||||||
return PCRE_ERROR_DFA_UCOND;
|
return PCRE_ERROR_DFA_UCOND;
|
||||||
|
|
||||||
/* The DEFINE condition is always false */
|
/* The DEFINE condition is always false */
|
||||||
@ -2671,7 +2745,7 @@ for (;;)
|
|||||||
which means "test if in any recursion". We can't test for specifically
|
which means "test if in any recursion". We can't test for specifically
|
||||||
recursed groups. */
|
recursed groups. */
|
||||||
|
|
||||||
else if (condcode == OP_RREF || condcode == OP_NRREF)
|
else if (condcode == OP_RREF)
|
||||||
{
|
{
|
||||||
int value = GET2(code, LINK_SIZE + 2);
|
int value = GET2(code, LINK_SIZE + 2);
|
||||||
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
|
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
|
||||||
|
545
pcre/pcre_exec.c
545
pcre/pcre_exec.c
@ -107,8 +107,8 @@ because the offset vector is always a multiple of 3 long. */
|
|||||||
|
|
||||||
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
||||||
|
|
||||||
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
|
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
|
||||||
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
|
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
|
||||||
|
|
||||||
#ifdef PCRE_DEBUG
|
#ifdef PCRE_DEBUG
|
||||||
/*************************************************
|
/*************************************************
|
||||||
@ -167,7 +167,7 @@ match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
|
|||||||
{
|
{
|
||||||
PCRE_PUCHAR eptr_start = eptr;
|
PCRE_PUCHAR eptr_start = eptr;
|
||||||
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
|
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
|
||||||
#ifdef SUPPORT_UTF
|
#if defined SUPPORT_UTF && defined SUPPORT_UCP
|
||||||
BOOL utf = md->utf;
|
BOOL utf = md->utf;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -195,8 +195,7 @@ ASCII characters. */
|
|||||||
|
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF
|
#if defined SUPPORT_UTF && defined SUPPORT_UCP
|
||||||
#ifdef SUPPORT_UCP
|
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
/* Match characters up to the end of the reference. NOTE: the number of
|
/* Match characters up to the end of the reference. NOTE: the number of
|
||||||
@ -229,7 +228,6 @@ if (caseless)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
|
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
|
||||||
@ -312,7 +310,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
|||||||
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
|
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
|
||||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
|
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
|
||||||
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
|
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
|
||||||
RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
|
RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
|
||||||
|
|
||||||
/* These versions of the macros use the stack, as normal. There are debugging
|
/* These versions of the macros use the stack, as normal. There are debugging
|
||||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||||
@ -1173,6 +1171,7 @@ for (;;)
|
|||||||
ecode = md->start_code + code_offset;
|
ecode = md->start_code + code_offset;
|
||||||
save_capture_last = md->capture_last;
|
save_capture_last = md->capture_last;
|
||||||
matched_once = TRUE;
|
matched_once = TRUE;
|
||||||
|
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1245,6 +1244,7 @@ for (;;)
|
|||||||
eptr = md->end_match_ptr;
|
eptr = md->end_match_ptr;
|
||||||
ecode = md->start_code + code_offset;
|
ecode = md->start_code + code_offset;
|
||||||
matched_once = TRUE;
|
matched_once = TRUE;
|
||||||
|
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1274,25 +1274,32 @@ for (;;)
|
|||||||
|
|
||||||
/* Control never reaches here. */
|
/* Control never reaches here. */
|
||||||
|
|
||||||
/* Conditional group: compilation checked that there are no more than
|
/* Conditional group: compilation checked that there are no more than two
|
||||||
two branches. If the condition is false, skipping the first branch takes us
|
branches. If the condition is false, skipping the first branch takes us
|
||||||
past the end if there is only one branch, but that's OK because that is
|
past the end of the item if there is only one branch, but that's exactly
|
||||||
exactly what going to the ket would do. */
|
what we want. */
|
||||||
|
|
||||||
case OP_COND:
|
case OP_COND:
|
||||||
case OP_SCOND:
|
case OP_SCOND:
|
||||||
codelink = GET(ecode, 1);
|
|
||||||
|
/* The variable codelink will be added to ecode when the condition is
|
||||||
|
false, to get to the second branch. Setting it to the offset to the ALT
|
||||||
|
or KET, then incrementing ecode achieves this effect. We now have ecode
|
||||||
|
pointing to the condition or callout. */
|
||||||
|
|
||||||
|
codelink = GET(ecode, 1); /* Offset to the second branch */
|
||||||
|
ecode += 1 + LINK_SIZE; /* From this opcode */
|
||||||
|
|
||||||
/* Because of the way auto-callout works during compile, a callout item is
|
/* Because of the way auto-callout works during compile, a callout item is
|
||||||
inserted between OP_COND and an assertion condition. */
|
inserted between OP_COND and an assertion condition. */
|
||||||
|
|
||||||
if (ecode[LINK_SIZE+1] == OP_CALLOUT)
|
if (*ecode == OP_CALLOUT)
|
||||||
{
|
{
|
||||||
if (PUBL(callout) != NULL)
|
if (PUBL(callout) != NULL)
|
||||||
{
|
{
|
||||||
PUBL(callout_block) cb;
|
PUBL(callout_block) cb;
|
||||||
cb.version = 2; /* Version 1 of the callout block */
|
cb.version = 2; /* Version 1 of the callout block */
|
||||||
cb.callout_number = ecode[LINK_SIZE+2];
|
cb.callout_number = ecode[1];
|
||||||
cb.offset_vector = md->offset_vector;
|
cb.offset_vector = md->offset_vector;
|
||||||
#if defined COMPILE_PCRE8
|
#if defined COMPILE_PCRE8
|
||||||
cb.subject = (PCRE_SPTR)md->start_subject;
|
cb.subject = (PCRE_SPTR)md->start_subject;
|
||||||
@ -1304,8 +1311,8 @@ for (;;)
|
|||||||
cb.subject_length = (int)(md->end_subject - md->start_subject);
|
cb.subject_length = (int)(md->end_subject - md->start_subject);
|
||||||
cb.start_match = (int)(mstart - md->start_subject);
|
cb.start_match = (int)(mstart - md->start_subject);
|
||||||
cb.current_position = (int)(eptr - md->start_subject);
|
cb.current_position = (int)(eptr - md->start_subject);
|
||||||
cb.pattern_position = GET(ecode, LINK_SIZE + 3);
|
cb.pattern_position = GET(ecode, 2);
|
||||||
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
|
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||||
cb.capture_top = offset_top/2;
|
cb.capture_top = offset_top/2;
|
||||||
cb.capture_last = md->capture_last & CAPLMASK;
|
cb.capture_last = md->capture_last & CAPLMASK;
|
||||||
/* Internal change requires this for API compatibility. */
|
/* Internal change requires this for API compatibility. */
|
||||||
@ -1315,207 +1322,119 @@ for (;;)
|
|||||||
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
|
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
|
||||||
if (rrc < 0) RRETURN(rrc);
|
if (rrc < 0) RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Advance ecode past the callout, so it now points to the condition. We
|
||||||
|
must adjust codelink so that the value of ecode+codelink is unchanged. */
|
||||||
|
|
||||||
ecode += PRIV(OP_lengths)[OP_CALLOUT];
|
ecode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||||
codelink -= PRIV(OP_lengths)[OP_CALLOUT];
|
codelink -= PRIV(OP_lengths)[OP_CALLOUT];
|
||||||
}
|
}
|
||||||
|
|
||||||
condcode = ecode[LINK_SIZE+1];
|
/* Test the various possible conditions */
|
||||||
|
|
||||||
/* Now see what the actual condition is */
|
condition = FALSE;
|
||||||
|
switch(condcode = *ecode)
|
||||||
if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
|
|
||||||
{
|
{
|
||||||
if (md->recursive == NULL) /* Not recursing => FALSE */
|
case OP_RREF: /* Numbered group recursion test */
|
||||||
|
if (md->recursive != NULL) /* Not recursing => FALSE */
|
||||||
{
|
{
|
||||||
condition = FALSE;
|
unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
|
||||||
ecode += GET(ecode, 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
|
|
||||||
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
|
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
|
||||||
|
|
||||||
/* If the test is for recursion into a specific subpattern, and it is
|
|
||||||
false, but the test was set up by name, scan the table to see if the
|
|
||||||
name refers to any other numbers, and test them. The condition is true
|
|
||||||
if any one is set. */
|
|
||||||
|
|
||||||
if (!condition && condcode == OP_NRREF)
|
|
||||||
{
|
|
||||||
pcre_uchar *slotA = md->name_table;
|
|
||||||
for (i = 0; i < md->name_count; i++)
|
|
||||||
{
|
|
||||||
if (GET2(slotA, 0) == recno) break;
|
|
||||||
slotA += md->name_entry_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Found a name for the number - there can be only one; duplicate
|
|
||||||
names for different numbers are allowed, but not vice versa. First
|
|
||||||
scan down for duplicates. */
|
|
||||||
|
|
||||||
if (i < md->name_count)
|
|
||||||
{
|
|
||||||
pcre_uchar *slotB = slotA;
|
|
||||||
while (slotB > md->name_table)
|
|
||||||
{
|
|
||||||
slotB -= md->name_entry_size;
|
|
||||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
|
||||||
{
|
|
||||||
condition = GET2(slotB, 0) == md->recursive->group_num;
|
|
||||||
if (condition) break;
|
|
||||||
}
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Scan up for duplicates */
|
|
||||||
|
|
||||||
if (!condition)
|
|
||||||
{
|
|
||||||
slotB = slotA;
|
|
||||||
for (i++; i < md->name_count; i++)
|
|
||||||
{
|
|
||||||
slotB += md->name_entry_size;
|
|
||||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
|
||||||
{
|
|
||||||
condition = GET2(slotB, 0) == md->recursive->group_num;
|
|
||||||
if (condition) break;
|
|
||||||
}
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Chose branch according to the condition */
|
|
||||||
|
|
||||||
ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
|
|
||||||
}
|
}
|
||||||
}
|
break;
|
||||||
|
|
||||||
else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
|
case OP_DNRREF: /* Duplicate named group recursion test */
|
||||||
{
|
if (md->recursive != NULL)
|
||||||
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
|
|
||||||
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
|
||||||
|
|
||||||
/* If the numbered capture is unset, but the reference was by name,
|
|
||||||
scan the table to see if the name refers to any other numbers, and test
|
|
||||||
them. The condition is true if any one is set. This is tediously similar
|
|
||||||
to the code above, but not close enough to try to amalgamate. */
|
|
||||||
|
|
||||||
if (!condition && condcode == OP_NCREF)
|
|
||||||
{
|
{
|
||||||
unsigned int refno = offset >> 1;
|
int count = GET2(ecode, 1 + IMM2_SIZE);
|
||||||
pcre_uchar *slotA = md->name_table;
|
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||||
|
while (count-- > 0)
|
||||||
for (i = 0; i < md->name_count; i++)
|
|
||||||
{
|
{
|
||||||
if (GET2(slotA, 0) == refno) break;
|
unsigned int recno = GET2(slot, 0);
|
||||||
slotA += md->name_entry_size;
|
condition = recno == md->recursive->group_num;
|
||||||
}
|
if (condition) break;
|
||||||
|
slot += md->name_entry_size;
|
||||||
/* Found a name for the number - there can be only one; duplicate names
|
|
||||||
for different numbers are allowed, but not vice versa. First scan down
|
|
||||||
for duplicates. */
|
|
||||||
|
|
||||||
if (i < md->name_count)
|
|
||||||
{
|
|
||||||
pcre_uchar *slotB = slotA;
|
|
||||||
while (slotB > md->name_table)
|
|
||||||
{
|
|
||||||
slotB -= md->name_entry_size;
|
|
||||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
|
||||||
{
|
|
||||||
offset = GET2(slotB, 0) << 1;
|
|
||||||
condition = offset < offset_top &&
|
|
||||||
md->offset_vector[offset] >= 0;
|
|
||||||
if (condition) break;
|
|
||||||
}
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Scan up for duplicates */
|
|
||||||
|
|
||||||
if (!condition)
|
|
||||||
{
|
|
||||||
slotB = slotA;
|
|
||||||
for (i++; i < md->name_count; i++)
|
|
||||||
{
|
|
||||||
slotB += md->name_entry_size;
|
|
||||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
|
||||||
{
|
|
||||||
offset = GET2(slotB, 0) << 1;
|
|
||||||
condition = offset < offset_top &&
|
|
||||||
md->offset_vector[offset] >= 0;
|
|
||||||
if (condition) break;
|
|
||||||
}
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* Chose branch according to the condition */
|
case OP_CREF: /* Numbered group used test */
|
||||||
|
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||||
|
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
||||||
|
break;
|
||||||
|
|
||||||
ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
|
case OP_DNCREF: /* Duplicate named group used test */
|
||||||
}
|
{
|
||||||
|
int count = GET2(ecode, 1 + IMM2_SIZE);
|
||||||
|
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||||
|
while (count-- > 0)
|
||||||
|
{
|
||||||
|
offset = GET2(slot, 0) << 1;
|
||||||
|
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
||||||
|
if (condition) break;
|
||||||
|
slot += md->name_entry_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
else if (condcode == OP_DEF) /* DEFINE - always false */
|
case OP_DEF: /* DEFINE - always false */
|
||||||
{
|
break;
|
||||||
condition = FALSE;
|
|
||||||
ecode += GET(ecode, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||||
md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
|
md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||||
an assertion. */
|
of an assertion. */
|
||||||
|
|
||||||
else
|
default:
|
||||||
{
|
|
||||||
md->match_function_type = MATCH_CONDASSERT;
|
md->match_function_type = MATCH_CONDASSERT;
|
||||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
|
RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
|
||||||
if (rrc == MATCH_MATCH)
|
if (rrc == MATCH_MATCH)
|
||||||
{
|
{
|
||||||
if (md->end_offset_top > offset_top)
|
if (md->end_offset_top > offset_top)
|
||||||
offset_top = md->end_offset_top; /* Captures may have happened */
|
offset_top = md->end_offset_top; /* Captures may have happened */
|
||||||
condition = TRUE;
|
condition = TRUE;
|
||||||
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
|
|
||||||
|
/* Advance ecode past the assertion to the start of the first branch,
|
||||||
|
but adjust it so that the general choosing code below works. */
|
||||||
|
|
||||||
|
ecode += GET(ecode, 1);
|
||||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||||
|
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
|
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
|
||||||
assertion; it is therefore treated as NOMATCH. */
|
assertion; it is therefore treated as NOMATCH. Any other return is an
|
||||||
|
error. */
|
||||||
|
|
||||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||||
{
|
{
|
||||||
RRETURN(rrc); /* Need braces because of following else */
|
RRETURN(rrc); /* Need braces because of following else */
|
||||||
}
|
}
|
||||||
else
|
break;
|
||||||
{
|
|
||||||
condition = FALSE;
|
|
||||||
ecode += codelink;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We are now at the branch that is to be obeyed. As there is only one, can
|
/* Choose branch according to the condition */
|
||||||
use tail recursion to avoid using another stack frame, except when there is
|
|
||||||
unlimited repeat of a possibly empty group. In the latter case, a recursive
|
|
||||||
call to match() is always required, unless the second alternative doesn't
|
|
||||||
exist, in which case we can just plough on. Note that, for compatibility
|
|
||||||
with Perl, the | in a conditional group is NOT treated as creating two
|
|
||||||
alternatives. If a THEN is encountered in the branch, it propagates out to
|
|
||||||
the enclosing alternative (unless nested in a deeper set of alternatives,
|
|
||||||
of course). */
|
|
||||||
|
|
||||||
if (condition || *ecode == OP_ALT)
|
ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
|
||||||
|
|
||||||
|
/* We are now at the branch that is to be obeyed. As there is only one, we
|
||||||
|
can use tail recursion to avoid using another stack frame, except when
|
||||||
|
there is unlimited repeat of a possibly empty group. In the latter case, a
|
||||||
|
recursive call to match() is always required, unless the second alternative
|
||||||
|
doesn't exist, in which case we can just plough on. Note that, for
|
||||||
|
compatibility with Perl, the | in a conditional group is NOT treated as
|
||||||
|
creating two alternatives. If a THEN is encountered in the branch, it
|
||||||
|
propagates out to the enclosing alternative (unless nested in a deeper set
|
||||||
|
of alternatives, of course). */
|
||||||
|
|
||||||
|
if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
|
||||||
{
|
{
|
||||||
if (op != OP_SCOND)
|
if (op != OP_SCOND)
|
||||||
{
|
{
|
||||||
ecode += 1 + LINK_SIZE;
|
|
||||||
goto TAIL_RECURSE;
|
goto TAIL_RECURSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
md->match_function_type = MATCH_CBEGROUP;
|
md->match_function_type = MATCH_CBEGROUP;
|
||||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
|
||||||
RRETURN(rrc);
|
RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1523,7 +1442,6 @@ for (;;)
|
|||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ecode += 1 + LINK_SIZE;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2089,6 +2007,7 @@ for (;;)
|
|||||||
|
|
||||||
if (*ecode == OP_KETRPOS)
|
if (*ecode == OP_KETRPOS)
|
||||||
{
|
{
|
||||||
|
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||||
md->end_match_ptr = eptr;
|
md->end_match_ptr = eptr;
|
||||||
md->end_offset_top = offset_top;
|
md->end_offset_top = offset_top;
|
||||||
RRETURN(MATCH_KETRPOS);
|
RRETURN(MATCH_KETRPOS);
|
||||||
@ -2656,19 +2575,24 @@ for (;;)
|
|||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
was changed at release 8.34. */
|
||||||
== (op == OP_NOTPROP))
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
switch(c)
|
||||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
{
|
||||||
c == CHAR_FF || c == CHAR_CR)
|
HSPACE_CASES:
|
||||||
== (op == OP_NOTPROP))
|
VSPACE_CASES:
|
||||||
RRETURN(MATCH_NOMATCH);
|
if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
|
||||||
|
(op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -2742,15 +2666,7 @@ for (;;)
|
|||||||
similar code to character type repeats - written out again for speed.
|
similar code to character type repeats - written out again for speed.
|
||||||
However, if the referenced string is the empty string, always treat
|
However, if the referenced string is the empty string, always treat
|
||||||
it as matched, any number of times (otherwise there could be infinite
|
it as matched, any number of times (otherwise there could be infinite
|
||||||
loops). */
|
loops). If the reference is unset, there are two possibilities:
|
||||||
|
|
||||||
case OP_REF:
|
|
||||||
case OP_REFI:
|
|
||||||
caseless = op == OP_REFI;
|
|
||||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
|
||||||
ecode += 1 + IMM2_SIZE;
|
|
||||||
|
|
||||||
/* If the reference is unset, there are two possibilities:
|
|
||||||
|
|
||||||
(a) In the default, Perl-compatible state, set the length negative;
|
(a) In the default, Perl-compatible state, set the length negative;
|
||||||
this ensures that every attempt at a match fails. We can't just fail
|
this ensures that every attempt at a match fails. We can't just fail
|
||||||
@ -2760,8 +2676,39 @@ for (;;)
|
|||||||
so that the back reference matches an empty string.
|
so that the back reference matches an empty string.
|
||||||
|
|
||||||
Otherwise, set the length to the length of what was matched by the
|
Otherwise, set the length to the length of what was matched by the
|
||||||
referenced subpattern. */
|
referenced subpattern.
|
||||||
|
|
||||||
|
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
|
||||||
|
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
|
||||||
|
and OP_DNREFI are used. In this case we must scan the list of groups to
|
||||||
|
which the name refers, and use the first one that is set. */
|
||||||
|
|
||||||
|
case OP_DNREF:
|
||||||
|
case OP_DNREFI:
|
||||||
|
caseless = op == OP_DNREFI;
|
||||||
|
{
|
||||||
|
int count = GET2(ecode, 1+IMM2_SIZE);
|
||||||
|
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||||
|
ecode += 1 + 2*IMM2_SIZE;
|
||||||
|
|
||||||
|
while (count-- > 0)
|
||||||
|
{
|
||||||
|
offset = GET2(slot, 0) << 1;
|
||||||
|
if (offset < offset_top && md->offset_vector[offset] >= 0) break;
|
||||||
|
slot += md->name_entry_size;
|
||||||
|
}
|
||||||
|
if (count < 0)
|
||||||
|
length = (md->jscript_compat)? 0 : -1;
|
||||||
|
else
|
||||||
|
length = md->offset_vector[offset+1] - md->offset_vector[offset];
|
||||||
|
}
|
||||||
|
goto REF_REPEAT;
|
||||||
|
|
||||||
|
case OP_REF:
|
||||||
|
case OP_REFI:
|
||||||
|
caseless = op == OP_REFI;
|
||||||
|
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||||
|
ecode += 1 + IMM2_SIZE;
|
||||||
if (offset >= offset_top || md->offset_vector[offset] < 0)
|
if (offset >= offset_top || md->offset_vector[offset] < 0)
|
||||||
length = (md->jscript_compat)? 0 : -1;
|
length = (md->jscript_compat)? 0 : -1;
|
||||||
else
|
else
|
||||||
@ -2769,6 +2716,7 @@ for (;;)
|
|||||||
|
|
||||||
/* Set up for repetition, or handle the non-repeated case */
|
/* Set up for repetition, or handle the non-repeated case */
|
||||||
|
|
||||||
|
REF_REPEAT:
|
||||||
switch (*ecode)
|
switch (*ecode)
|
||||||
{
|
{
|
||||||
case OP_CRSTAR:
|
case OP_CRSTAR:
|
||||||
@ -2917,8 +2865,12 @@ for (;;)
|
|||||||
case OP_CRMINPLUS:
|
case OP_CRMINPLUS:
|
||||||
case OP_CRQUERY:
|
case OP_CRQUERY:
|
||||||
case OP_CRMINQUERY:
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
c = *ecode++ - OP_CRSTAR;
|
c = *ecode++ - OP_CRSTAR;
|
||||||
minimize = (c & 1) != 0;
|
if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
|
||||||
|
else possessive = TRUE;
|
||||||
min = rep_min[c]; /* Pick up values from tables; */
|
min = rep_min[c]; /* Pick up values from tables; */
|
||||||
max = rep_max[c]; /* zero for max => infinity */
|
max = rep_max[c]; /* zero for max => infinity */
|
||||||
if (max == 0) max = INT_MAX;
|
if (max == 0) max = INT_MAX;
|
||||||
@ -2926,7 +2878,9 @@ for (;;)
|
|||||||
|
|
||||||
case OP_CRRANGE:
|
case OP_CRRANGE:
|
||||||
case OP_CRMINRANGE:
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
minimize = (*ecode == OP_CRMINRANGE);
|
minimize = (*ecode == OP_CRMINRANGE);
|
||||||
|
possessive = (*ecode == OP_CRPOSRANGE);
|
||||||
min = GET2(ecode, 1);
|
min = GET2(ecode, 1);
|
||||||
max = GET2(ecode, 1 + IMM2_SIZE);
|
max = GET2(ecode, 1 + IMM2_SIZE);
|
||||||
if (max == 0) max = INT_MAX;
|
if (max == 0) max = INT_MAX;
|
||||||
@ -3068,6 +3022,9 @@ for (;;)
|
|||||||
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
||||||
eptr += len;
|
eptr += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
|
||||||
@ -3098,6 +3055,9 @@ for (;;)
|
|||||||
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
||||||
eptr++;
|
eptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
while (eptr >= pp)
|
while (eptr >= pp)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
|
||||||
@ -3113,9 +3073,10 @@ for (;;)
|
|||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
|
|
||||||
|
|
||||||
/* Match an extended character class. This opcode is encountered only
|
/* Match an extended character class. In the 8-bit library, this opcode is
|
||||||
when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
|
encountered only when UTF-8 mode mode is supported. In the 16-bit and
|
||||||
mode, because Unicode properties are supported in non-UTF-8 mode. */
|
32-bit libraries, codepoints greater than 255 may be encountered even when
|
||||||
|
UTF is not supported. */
|
||||||
|
|
||||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||||
case OP_XCLASS:
|
case OP_XCLASS:
|
||||||
@ -3131,8 +3092,12 @@ for (;;)
|
|||||||
case OP_CRMINPLUS:
|
case OP_CRMINPLUS:
|
||||||
case OP_CRQUERY:
|
case OP_CRQUERY:
|
||||||
case OP_CRMINQUERY:
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
c = *ecode++ - OP_CRSTAR;
|
c = *ecode++ - OP_CRSTAR;
|
||||||
minimize = (c & 1) != 0;
|
if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
|
||||||
|
else possessive = TRUE;
|
||||||
min = rep_min[c]; /* Pick up values from tables; */
|
min = rep_min[c]; /* Pick up values from tables; */
|
||||||
max = rep_max[c]; /* zero for max => infinity */
|
max = rep_max[c]; /* zero for max => infinity */
|
||||||
if (max == 0) max = INT_MAX;
|
if (max == 0) max = INT_MAX;
|
||||||
@ -3140,7 +3105,9 @@ for (;;)
|
|||||||
|
|
||||||
case OP_CRRANGE:
|
case OP_CRRANGE:
|
||||||
case OP_CRMINRANGE:
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
minimize = (*ecode == OP_CRMINRANGE);
|
minimize = (*ecode == OP_CRMINRANGE);
|
||||||
|
possessive = (*ecode == OP_CRPOSRANGE);
|
||||||
min = GET2(ecode, 1);
|
min = GET2(ecode, 1);
|
||||||
max = GET2(ecode, 1 + IMM2_SIZE);
|
max = GET2(ecode, 1 + IMM2_SIZE);
|
||||||
if (max == 0) max = INT_MAX;
|
if (max == 0) max = INT_MAX;
|
||||||
@ -3212,6 +3179,9 @@ for (;;)
|
|||||||
if (!PRIV(xclass)(c, data, utf)) break;
|
if (!PRIV(xclass)(c, data, utf)) break;
|
||||||
eptr += len;
|
eptr += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
|
||||||
@ -3590,7 +3560,6 @@ for (;;)
|
|||||||
if (fc != cc && foc != cc) break;
|
if (fc != cc && foc != cc) break;
|
||||||
eptr++;
|
eptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
@ -3599,9 +3568,8 @@ for (;;)
|
|||||||
eptr--;
|
eptr--;
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
}
|
}
|
||||||
RRETURN(MATCH_NOMATCH);
|
/* Control never gets here */
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Caseful comparisons (includes all multi-byte characters) */
|
/* Caseful comparisons (includes all multi-byte characters) */
|
||||||
@ -3657,7 +3625,7 @@ for (;;)
|
|||||||
eptr--;
|
eptr--;
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
}
|
}
|
||||||
RRETURN(MATCH_NOMATCH);
|
/* Control never gets here */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
@ -3942,10 +3910,8 @@ for (;;)
|
|||||||
eptr--;
|
eptr--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Control never gets here */
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Caseful comparisons */
|
/* Caseful comparisons */
|
||||||
@ -4079,8 +4045,7 @@ for (;;)
|
|||||||
eptr--;
|
eptr--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Control never gets here */
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
@ -4262,22 +4227,11 @@ for (;;)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
for (i = 1; i <= min; i++)
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
{
|
was changed at release 8.34. */
|
||||||
if (eptr >= md->end_subject)
|
|
||||||
{
|
|
||||||
SCHECK_PARTIAL();
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
|
||||||
GETCHARINCTEST(c, eptr);
|
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
|
||||||
c == CHAR_FF || c == CHAR_CR)
|
|
||||||
== prop_fail_result)
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
for (i = 1; i <= min; i++)
|
for (i = 1; i <= min; i++)
|
||||||
{
|
{
|
||||||
@ -4287,10 +4241,18 @@ for (;;)
|
|||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
GETCHARINCTEST(c, eptr);
|
GETCHARINCTEST(c, eptr);
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
switch(c)
|
||||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
{
|
||||||
== prop_fail_result)
|
HSPACE_CASES:
|
||||||
RRETURN(MATCH_NOMATCH);
|
VSPACE_CASES:
|
||||||
|
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||||
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -5010,25 +4972,11 @@ for (;;)
|
|||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
for (fi = min;; fi++)
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
{
|
was changed at release 8.34. */
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
|
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
|
||||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
|
||||||
if (eptr >= md->end_subject)
|
|
||||||
{
|
|
||||||
SCHECK_PARTIAL();
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
|
||||||
GETCHARINCTEST(c, eptr);
|
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
|
||||||
c == CHAR_FF || c == CHAR_CR)
|
|
||||||
== prop_fail_result)
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
|
||||||
/* Control never gets here */
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
{
|
{
|
||||||
@ -5041,10 +4989,18 @@ for (;;)
|
|||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
GETCHARINCTEST(c, eptr);
|
GETCHARINCTEST(c, eptr);
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
switch(c)
|
||||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
{
|
||||||
== prop_fail_result)
|
HSPACE_CASES:
|
||||||
RRETURN(MATCH_NOMATCH);
|
VSPACE_CASES:
|
||||||
|
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||||
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
|
|
||||||
@ -5097,7 +5053,7 @@ for (;;)
|
|||||||
case PT_UCNC:
|
case PT_UCNC:
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
||||||
if (eptr >= md->end_subject)
|
if (eptr >= md->end_subject)
|
||||||
@ -5528,24 +5484,11 @@ for (;;)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SPACE: /* Perl space */
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
for (i = min; i < max; i++)
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
{
|
was changed at release 8.34. */
|
||||||
int len = 1;
|
|
||||||
if (eptr >= md->end_subject)
|
|
||||||
{
|
|
||||||
SCHECK_PARTIAL();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
GETCHARLENTEST(c, eptr, len);
|
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
|
||||||
c == CHAR_FF || c == CHAR_CR)
|
|
||||||
== prop_fail_result)
|
|
||||||
break;
|
|
||||||
eptr+= len;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
case PT_PXSPACE: /* POSIX space */
|
case PT_PXSPACE: /* POSIX space */
|
||||||
for (i = min; i < max; i++)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
@ -5556,12 +5499,21 @@ for (;;)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
GETCHARLENTEST(c, eptr, len);
|
GETCHARLENTEST(c, eptr, len);
|
||||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
switch(c)
|
||||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
{
|
||||||
== prop_fail_result)
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||||
|
goto ENDLOOP99; /* Break the loop */
|
||||||
|
break;
|
||||||
|
}
|
||||||
eptr+= len;
|
eptr+= len;
|
||||||
}
|
}
|
||||||
|
ENDLOOP99:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_WORD:
|
case PT_WORD:
|
||||||
@ -5642,7 +5594,7 @@ for (;;)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match extended Unicode sequences. We will get here only if the
|
/* Match extended Unicode grapheme clusters. We will get here only if the
|
||||||
support is in the binary; otherwise a compile-time error occurs. */
|
support is in the binary; otherwise a compile-time error occurs. */
|
||||||
|
|
||||||
else if (ctype == OP_EXTUNI)
|
else if (ctype == OP_EXTUNI)
|
||||||
@ -5675,21 +5627,41 @@ for (;;)
|
|||||||
/* eptr is now past the end of the maximum run */
|
/* eptr is now past the end of the maximum run */
|
||||||
|
|
||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE;
|
int lgb, rgb;
|
||||||
|
PCRE_PUCHAR fptr;
|
||||||
|
|
||||||
|
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
|
|
||||||
|
/* Backtracking over an extended grapheme cluster involves inspecting
|
||||||
|
the previous two characters (if present) to see if a break is
|
||||||
|
permitted between them. */
|
||||||
|
|
||||||
eptr--;
|
eptr--;
|
||||||
for (;;) /* Move back over one extended */
|
if (!utf) c = *eptr; else
|
||||||
{
|
{
|
||||||
if (!utf) c = *eptr; else
|
BACKCHAR(eptr);
|
||||||
|
GETCHAR(c, eptr);
|
||||||
|
}
|
||||||
|
rgb = UCD_GRAPHBREAK(c);
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
|
fptr = eptr - 1;
|
||||||
|
if (!utf) c = *fptr; else
|
||||||
{
|
{
|
||||||
BACKCHAR(eptr);
|
BACKCHAR(fptr);
|
||||||
GETCHAR(c, eptr);
|
GETCHAR(c, fptr);
|
||||||
}
|
}
|
||||||
if (UCD_CATEGORY(c) != ucp_M) break;
|
lgb = UCD_GRAPHBREAK(c);
|
||||||
eptr--;
|
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||||
|
eptr = fptr;
|
||||||
|
rgb = lgb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -6211,11 +6183,8 @@ for (;;)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get here if we can't make it match with any permitted repetitions */
|
/* Control never gets here */
|
||||||
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
|
||||||
|
|
||||||
/* There's been some horrible disaster. Arrival here can only mean there is
|
/* There's been some horrible disaster. Arrival here can only mean there is
|
||||||
something seriously wrong in the code above or the OP_xxx definitions. */
|
something seriously wrong in the code above or the OP_xxx definitions. */
|
||||||
@ -6249,15 +6218,15 @@ switch (frame->Xwhere)
|
|||||||
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
|
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
|
||||||
LBL(65) LBL(66)
|
LBL(65) LBL(66)
|
||||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||||
LBL(21)
|
LBL(20) LBL(21)
|
||||||
#endif
|
#endif
|
||||||
#ifdef SUPPORT_UTF
|
#ifdef SUPPORT_UTF
|
||||||
LBL(16) LBL(18) LBL(20)
|
LBL(16) LBL(18)
|
||||||
LBL(22) LBL(23) LBL(28) LBL(30)
|
LBL(22) LBL(23) LBL(28) LBL(30)
|
||||||
LBL(32) LBL(34) LBL(42) LBL(46)
|
LBL(32) LBL(34) LBL(42) LBL(46)
|
||||||
#ifdef SUPPORT_UCP
|
#ifdef SUPPORT_UCP
|
||||||
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
||||||
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
|
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
|
||||||
#endif /* SUPPORT_UCP */
|
#endif /* SUPPORT_UCP */
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UTF */
|
||||||
default:
|
default:
|
||||||
@ -6410,7 +6379,7 @@ const pcre_uint8 *start_bits = NULL;
|
|||||||
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
|
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
|
||||||
PCRE_PUCHAR end_subject;
|
PCRE_PUCHAR end_subject;
|
||||||
PCRE_PUCHAR start_partial = NULL;
|
PCRE_PUCHAR start_partial = NULL;
|
||||||
PCRE_PUCHAR match_partial;
|
PCRE_PUCHAR match_partial = NULL;
|
||||||
PCRE_PUCHAR req_char_ptr = start_match - 1;
|
PCRE_PUCHAR req_char_ptr = start_match - 1;
|
||||||
|
|
||||||
const pcre_study_data *study;
|
const pcre_study_data *study;
|
||||||
@ -7178,7 +7147,7 @@ if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
|
|||||||
|
|
||||||
/* Handle partial matches - disable any mark data */
|
/* Handle partial matches - disable any mark data */
|
||||||
|
|
||||||
if (start_partial != NULL)
|
if (match_partial != NULL)
|
||||||
{
|
{
|
||||||
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
|
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
|
||||||
md->mark = NULL;
|
md->mark = NULL;
|
||||||
|
@ -232,6 +232,10 @@ switch (what)
|
|||||||
*((pcre_uint32 *)where) = re->limit_recursion;
|
*((pcre_uint32 *)where) = re->limit_recursion;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_MATCH_EMPTY:
|
||||||
|
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
default: return PCRE_ERROR_BADOPTION;
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1149,6 +1149,7 @@ compatibility. */
|
|||||||
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
|
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
|
||||||
#define PCRE_MLSET 0x00002000 /* match limit set by regex */
|
#define PCRE_MLSET 0x00002000 /* match limit set by regex */
|
||||||
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
|
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
|
||||||
|
#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
|
||||||
|
|
||||||
#if defined COMPILE_PCRE8
|
#if defined COMPILE_PCRE8
|
||||||
#define PCRE_MODE PCRE_MODE8
|
#define PCRE_MODE PCRE_MODE8
|
||||||
@ -1173,7 +1174,8 @@ time, run time, or study time, respectively. */
|
|||||||
#define PUBLIC_COMPILE_OPTIONS \
|
#define PUBLIC_COMPILE_OPTIONS \
|
||||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
|
||||||
|
PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||||
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
|
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
|
||||||
|
|
||||||
@ -1531,22 +1533,25 @@ a positive value. */
|
|||||||
#define STRING_xdigit "xdigit"
|
#define STRING_xdigit "xdigit"
|
||||||
|
|
||||||
#define STRING_DEFINE "DEFINE"
|
#define STRING_DEFINE "DEFINE"
|
||||||
|
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||||
|
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||||
|
|
||||||
#define STRING_CR_RIGHTPAR "CR)"
|
#define STRING_CR_RIGHTPAR "CR)"
|
||||||
#define STRING_LF_RIGHTPAR "LF)"
|
#define STRING_LF_RIGHTPAR "LF)"
|
||||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||||
|
|
||||||
#else /* SUPPORT_UTF */
|
#else /* SUPPORT_UTF */
|
||||||
|
|
||||||
@ -1794,22 +1799,25 @@ only. */
|
|||||||
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
|
||||||
|
|
||||||
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
|
||||||
|
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||||
|
|
||||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||||
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||||
|
|
||||||
#endif /* SUPPORT_UTF */
|
#endif /* SUPPORT_UTF */
|
||||||
|
|
||||||
@ -1851,6 +1859,17 @@ only. */
|
|||||||
#define PT_WORD 8 /* Word - L plus N plus underscore */
|
#define PT_WORD 8 /* Word - L plus N plus underscore */
|
||||||
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||||
#define PT_UCNC 10 /* Universal Character nameable character */
|
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||||
|
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
|
||||||
|
|
||||||
|
/* The following special properties are used only in XCLASS items, when POSIX
|
||||||
|
classes are specified and PCRE_UCP is set - in other words, for Unicode
|
||||||
|
handling of these classes. They are not available via the \p or \P escapes like
|
||||||
|
those in the above list, and so they do not take part in the autopossessifying
|
||||||
|
table. */
|
||||||
|
|
||||||
|
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
|
||||||
|
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
|
||||||
|
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
|
||||||
|
|
||||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||||
contain characters with values greater than 255. */
|
contain characters with values greater than 255. */
|
||||||
@ -1865,9 +1884,9 @@ contain characters with values greater than 255. */
|
|||||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||||
|
|
||||||
/* These are escaped items that aren't just an encoding of a particular data
|
/* These are escaped items that aren't just an encoding of a particular data
|
||||||
value such as \n. They must have non-zero values, as check_escape() returns
|
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||||
0 for a data character. Also, they must appear in the same order as in the opcode
|
for a data character. Also, they must appear in the same order as in the
|
||||||
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
||||||
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
|
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
|
||||||
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
|
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
|
||||||
non-DOTALL mode, "." behaves like \N.
|
non-DOTALL mode, "." behaves like \N.
|
||||||
@ -1890,12 +1909,31 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
|||||||
ESC_E, ESC_Q, ESC_g, ESC_k,
|
ESC_E, ESC_Q, ESC_g, ESC_k,
|
||||||
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
|
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
|
||||||
|
|
||||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
|
||||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
|
||||||
|
|
||||||
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
/********************** Opcode definitions ******************/
|
||||||
that follow must also be updated to match. There are also tables called
|
|
||||||
"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
|
/****** NOTE NOTE NOTE ******
|
||||||
|
|
||||||
|
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||||
|
order to the list of escapes immediately above. Furthermore, values up to
|
||||||
|
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||||
|
pcre_compile.c
|
||||||
|
|
||||||
|
Whenever this list is updated, the two macro definitions that follow must be
|
||||||
|
updated to match. The possessification table called "opcode_possessify" in
|
||||||
|
pcre_compile.c must also be updated, and also the tables called "coptable"
|
||||||
|
and "poptable" in pcre_dfa_exec.c.
|
||||||
|
|
||||||
|
****** NOTE NOTE NOTE ******/
|
||||||
|
|
||||||
|
|
||||||
|
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
|
||||||
|
are used in a table for deciding whether a repeated character type can be
|
||||||
|
auto-possessified. */
|
||||||
|
|
||||||
|
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
|
||||||
|
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
|
||||||
|
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
OP_END, /* 0 End of pattern */
|
OP_END, /* 0 End of pattern */
|
||||||
@ -1928,10 +1966,15 @@ enum {
|
|||||||
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
||||||
OP_EOD, /* 24 End of data (\z) */
|
OP_EOD, /* 24 End of data (\z) */
|
||||||
|
|
||||||
OP_CIRC, /* 25 Start of line - not multiline */
|
/* Line end assertions */
|
||||||
OP_CIRCM, /* 26 Start of line - multiline */
|
|
||||||
OP_DOLL, /* 27 End of line - not multiline */
|
OP_DOLL, /* 25 End of line - not multiline */
|
||||||
OP_DOLLM, /* 28 End of line - multiline */
|
OP_DOLLM, /* 26 End of line - multiline */
|
||||||
|
OP_CIRC, /* 27 Start of line - not multiline */
|
||||||
|
OP_CIRCM, /* 28 Start of line - multiline */
|
||||||
|
|
||||||
|
/* Single characters; caseful must precede the caseless ones */
|
||||||
|
|
||||||
OP_CHAR, /* 29 Match one character, casefully */
|
OP_CHAR, /* 29 Match one character, casefully */
|
||||||
OP_CHARI, /* 30 Match one character, caselessly */
|
OP_CHARI, /* 30 Match one character, caselessly */
|
||||||
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
||||||
@ -1940,7 +1983,7 @@ enum {
|
|||||||
/* The following sets of 13 opcodes must always be kept in step because
|
/* The following sets of 13 opcodes must always be kept in step because
|
||||||
the offset from the first one is used to generate the others. */
|
the offset from the first one is used to generate the others. */
|
||||||
|
|
||||||
/**** Single characters, caseful, must precede the caseless ones ****/
|
/* Repeated characters; caseful must precede the caseless ones */
|
||||||
|
|
||||||
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
||||||
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
||||||
@ -1958,7 +2001,7 @@ enum {
|
|||||||
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
||||||
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
||||||
|
|
||||||
/**** Single characters, caseless, must follow the caseful ones */
|
/* Repeated characters; caseless must follow the caseful ones */
|
||||||
|
|
||||||
OP_STARI, /* 46 */
|
OP_STARI, /* 46 */
|
||||||
OP_MINSTARI, /* 47 */
|
OP_MINSTARI, /* 47 */
|
||||||
@ -1976,8 +2019,8 @@ enum {
|
|||||||
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
||||||
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
||||||
|
|
||||||
/**** The negated ones must follow the non-negated ones, and match them ****/
|
/* The negated ones must follow the non-negated ones, and match them */
|
||||||
/**** Negated single character, caseful; must precede the caseless ones ****/
|
/* Negated repeated character, caseful; must precede the caseless ones */
|
||||||
|
|
||||||
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
||||||
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
||||||
@ -1995,7 +2038,7 @@ enum {
|
|||||||
OP_NOTPOSQUERY, /* 70 */
|
OP_NOTPOSQUERY, /* 70 */
|
||||||
OP_NOTPOSUPTO, /* 71 */
|
OP_NOTPOSUPTO, /* 71 */
|
||||||
|
|
||||||
/**** Negated single character, caseless; must follow the caseful ones ****/
|
/* Negated repeated character, caseless; must follow the caseful ones */
|
||||||
|
|
||||||
OP_NOTSTARI, /* 72 */
|
OP_NOTSTARI, /* 72 */
|
||||||
OP_NOTMINSTARI, /* 73 */
|
OP_NOTMINSTARI, /* 73 */
|
||||||
@ -2013,7 +2056,7 @@ enum {
|
|||||||
OP_NOTPOSQUERYI, /* 83 */
|
OP_NOTPOSQUERYI, /* 83 */
|
||||||
OP_NOTPOSUPTOI, /* 84 */
|
OP_NOTPOSUPTOI, /* 84 */
|
||||||
|
|
||||||
/**** Character types ****/
|
/* Character types */
|
||||||
|
|
||||||
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
||||||
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
||||||
@ -2044,89 +2087,96 @@ enum {
|
|||||||
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
||||||
OP_CRMINRANGE, /* 105 */
|
OP_CRMINRANGE, /* 105 */
|
||||||
|
|
||||||
|
OP_CRPOSSTAR, /* 106 Possessified versions */
|
||||||
|
OP_CRPOSPLUS, /* 107 */
|
||||||
|
OP_CRPOSQUERY, /* 108 */
|
||||||
|
OP_CRPOSRANGE, /* 109 */
|
||||||
|
|
||||||
/* End of quantifier opcodes */
|
/* End of quantifier opcodes */
|
||||||
|
|
||||||
OP_CLASS, /* 106 Match a character class, chars < 256 only */
|
OP_CLASS, /* 110 Match a character class, chars < 256 only */
|
||||||
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
|
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
|
||||||
class - the difference is relevant only when a
|
class - the difference is relevant only when a
|
||||||
character > 255 is encountered. */
|
character > 255 is encountered. */
|
||||||
OP_XCLASS, /* 108 Extended class for handling > 255 chars within the
|
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
|
||||||
class. This does both positive and negative. */
|
class. This does both positive and negative. */
|
||||||
OP_REF, /* 109 Match a back reference, casefully */
|
OP_REF, /* 113 Match a back reference, casefully */
|
||||||
OP_REFI, /* 110 Match a back reference, caselessly */
|
OP_REFI, /* 114 Match a back reference, caselessly */
|
||||||
OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
|
OP_DNREF, /* 115 Match a duplicate name backref, casefully */
|
||||||
OP_CALLOUT, /* 112 Call out to external function if provided */
|
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
|
||||||
|
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
|
||||||
|
OP_CALLOUT, /* 118 Call out to external function if provided */
|
||||||
|
|
||||||
OP_ALT, /* 113 Start of alternation */
|
OP_ALT, /* 119 Start of alternation */
|
||||||
OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
|
OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
|
||||||
OP_KETRMAX, /* 115 These two must remain together and in this */
|
OP_KETRMAX, /* 121 These two must remain together and in this */
|
||||||
OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
|
OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
|
||||||
OP_KETRPOS, /* 117 Possessive unlimited repeat. */
|
OP_KETRPOS, /* 123 Possessive unlimited repeat. */
|
||||||
|
|
||||||
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
|
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
|
||||||
asserts must remain in order. */
|
asserts must remain in order. */
|
||||||
|
|
||||||
OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
|
OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
|
||||||
OP_ASSERT, /* 119 Positive lookahead */
|
OP_ASSERT, /* 125 Positive lookahead */
|
||||||
OP_ASSERT_NOT, /* 120 Negative lookahead */
|
OP_ASSERT_NOT, /* 126 Negative lookahead */
|
||||||
OP_ASSERTBACK, /* 121 Positive lookbehind */
|
OP_ASSERTBACK, /* 127 Positive lookbehind */
|
||||||
OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
|
OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
|
||||||
|
|
||||||
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
|
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
|
||||||
after the assertions, with ONCE first, as there's a test for >= ONCE for a
|
after the assertions, with ONCE first, as there's a test for >= ONCE for a
|
||||||
subpattern that isn't an assertion. The POS versions must immediately follow
|
subpattern that isn't an assertion. The POS versions must immediately follow
|
||||||
the non-POS versions in each case. */
|
the non-POS versions in each case. */
|
||||||
|
|
||||||
OP_ONCE, /* 123 Atomic group, contains captures */
|
OP_ONCE, /* 129 Atomic group, contains captures */
|
||||||
OP_ONCE_NC, /* 124 Atomic group containing no captures */
|
OP_ONCE_NC, /* 130 Atomic group containing no captures */
|
||||||
OP_BRA, /* 125 Start of non-capturing bracket */
|
OP_BRA, /* 131 Start of non-capturing bracket */
|
||||||
OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
|
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
|
||||||
OP_CBRA, /* 127 Start of capturing bracket */
|
OP_CBRA, /* 133 Start of capturing bracket */
|
||||||
OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
|
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
|
||||||
OP_COND, /* 129 Conditional group */
|
OP_COND, /* 135 Conditional group */
|
||||||
|
|
||||||
/* These five must follow the previous five, in the same order. There's a
|
/* These five must follow the previous five, in the same order. There's a
|
||||||
check for >= SBRA to distinguish the two sets. */
|
check for >= SBRA to distinguish the two sets. */
|
||||||
|
|
||||||
OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
|
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
|
||||||
OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
|
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
|
||||||
OP_SCBRA, /* 132 Start of capturing bracket, check empty */
|
OP_SCBRA, /* 138 Start of capturing bracket, check empty */
|
||||||
OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
|
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
|
||||||
OP_SCOND, /* 134 Conditional group, check empty */
|
OP_SCOND, /* 140 Conditional group, check empty */
|
||||||
|
|
||||||
/* The next two pairs must (respectively) be kept together. */
|
/* The next two pairs must (respectively) be kept together. */
|
||||||
|
|
||||||
OP_CREF, /* 135 Used to hold a capture number as condition */
|
OP_CREF, /* 141 Used to hold a capture number as condition */
|
||||||
OP_NCREF, /* 136 Same, but generated by a name reference*/
|
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
||||||
OP_RREF, /* 137 Used to hold a recursion number as condition */
|
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
||||||
OP_NRREF, /* 138 Same, but generated by a name reference*/
|
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
||||||
OP_DEF, /* 139 The DEFINE condition */
|
OP_DEF, /* 145 The DEFINE condition */
|
||||||
|
|
||||||
OP_BRAZERO, /* 140 These two must remain together and in this */
|
OP_BRAZERO, /* 146 These two must remain together and in this */
|
||||||
OP_BRAMINZERO, /* 141 order. */
|
OP_BRAMINZERO, /* 147 order. */
|
||||||
OP_BRAPOSZERO, /* 142 */
|
OP_BRAPOSZERO, /* 148 */
|
||||||
|
|
||||||
/* These are backtracking control verbs */
|
/* These are backtracking control verbs */
|
||||||
|
|
||||||
OP_MARK, /* 143 always has an argument */
|
OP_MARK, /* 149 always has an argument */
|
||||||
OP_PRUNE, /* 144 */
|
OP_PRUNE, /* 150 */
|
||||||
OP_PRUNE_ARG, /* 145 same, but with argument */
|
OP_PRUNE_ARG, /* 151 same, but with argument */
|
||||||
OP_SKIP, /* 146 */
|
OP_SKIP, /* 152 */
|
||||||
OP_SKIP_ARG, /* 147 same, but with argument */
|
OP_SKIP_ARG, /* 153 same, but with argument */
|
||||||
OP_THEN, /* 148 */
|
OP_THEN, /* 154 */
|
||||||
OP_THEN_ARG, /* 149 same, but with argument */
|
OP_THEN_ARG, /* 155 same, but with argument */
|
||||||
OP_COMMIT, /* 150 */
|
OP_COMMIT, /* 156 */
|
||||||
|
|
||||||
/* These are forced failure and success verbs */
|
/* These are forced failure and success verbs */
|
||||||
|
|
||||||
OP_FAIL, /* 151 */
|
OP_FAIL, /* 157 */
|
||||||
OP_ACCEPT, /* 152 */
|
OP_ACCEPT, /* 158 */
|
||||||
OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
|
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
|
||||||
OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
|
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
|
||||||
|
|
||||||
/* This is used to skip a subpattern with a {0} quantifier */
|
/* This is used to skip a subpattern with a {0} quantifier */
|
||||||
|
|
||||||
OP_SKIPZERO, /* 155 */
|
OP_SKIPZERO, /* 161 */
|
||||||
|
|
||||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||||
are the correct length, in order to catch updating errors - there have been
|
are the correct length, in order to catch updating errors - there have been
|
||||||
@ -2137,7 +2187,8 @@ enum {
|
|||||||
|
|
||||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||||
definitions that follow must also be updated to match. There are also tables
|
definitions that follow must also be updated to match. There are also tables
|
||||||
called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
|
called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
|
||||||
|
pcre_dfa_exec.c that must be updated. */
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines textual names for all the opcodes. These are used only
|
/* This macro defines textual names for all the opcodes. These are used only
|
||||||
@ -2150,7 +2201,7 @@ some cases doesn't actually use these names at all). */
|
|||||||
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||||
"extuni", "\\Z", "\\z", \
|
"extuni", "\\Z", "\\z", \
|
||||||
"^", "^", "$", "$", "char", "chari", "not", "noti", \
|
"$", "$", "^", "^", "char", "chari", "not", "noti", \
|
||||||
"*", "*?", "+", "+?", "?", "??", \
|
"*", "*?", "+", "+?", "?", "??", \
|
||||||
"{", "{", "{", \
|
"{", "{", "{", \
|
||||||
"*+","++", "?+", "{", \
|
"*+","++", "?+", "{", \
|
||||||
@ -2166,7 +2217,8 @@ some cases doesn't actually use these names at all). */
|
|||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||||
"*+","++", "?+", "{", \
|
"*+","++", "?+", "{", \
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||||
"class", "nclass", "xclass", "Ref", "Refi", \
|
"*+","++", "?+", "{", \
|
||||||
|
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
|
||||||
"Recurse", "Callout", \
|
"Recurse", "Callout", \
|
||||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||||
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
|
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
|
||||||
@ -2175,7 +2227,7 @@ some cases doesn't actually use these names at all). */
|
|||||||
"Cond", \
|
"Cond", \
|
||||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||||
"SCond", \
|
"SCond", \
|
||||||
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
|
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
|
||||||
"Brazero", "Braminzero", "Braposzero", \
|
"Brazero", "Braminzero", "Braposzero", \
|
||||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||||
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
||||||
@ -2200,7 +2252,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
3, 3, /* \P, \p */ \
|
3, 3, /* \P, \p */ \
|
||||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||||
1, /* \X */ \
|
1, /* \X */ \
|
||||||
1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \
|
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
|
||||||
2, /* Char - the minimum length */ \
|
2, /* Char - the minimum length */ \
|
||||||
2, /* Chari - the minimum length */ \
|
2, /* Chari - the minimum length */ \
|
||||||
2, /* not */ \
|
2, /* not */ \
|
||||||
@ -2231,11 +2283,14 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
/* Character class & ref repeats */ \
|
/* Character class & ref repeats */ \
|
||||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||||
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
||||||
|
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
|
||||||
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
|
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
|
||||||
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
|
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
|
||||||
0, /* XCLASS - variable length */ \
|
0, /* XCLASS - variable length */ \
|
||||||
1+IMM2_SIZE, /* REF */ \
|
1+IMM2_SIZE, /* REF */ \
|
||||||
1+IMM2_SIZE, /* REFI */ \
|
1+IMM2_SIZE, /* REFI */ \
|
||||||
|
1+2*IMM2_SIZE, /* DNREF */ \
|
||||||
|
1+2*IMM2_SIZE, /* DNREFI */ \
|
||||||
1+LINK_SIZE, /* RECURSE */ \
|
1+LINK_SIZE, /* RECURSE */ \
|
||||||
2+2*LINK_SIZE, /* CALLOUT */ \
|
2+2*LINK_SIZE, /* CALLOUT */ \
|
||||||
1+LINK_SIZE, /* Alt */ \
|
1+LINK_SIZE, /* Alt */ \
|
||||||
@ -2260,8 +2315,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
||||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
||||||
1+LINK_SIZE, /* SCOND */ \
|
1+LINK_SIZE, /* SCOND */ \
|
||||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \
|
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||||
1, /* DEF */ \
|
1, /* DEF */ \
|
||||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||||
@ -2270,8 +2325,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||||
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
|
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
|
||||||
|
|
||||||
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
|
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||||
condition. */
|
|
||||||
|
|
||||||
#define RREF_ANY 0xffff
|
#define RREF_ANY 0xffff
|
||||||
|
|
||||||
@ -2286,9 +2340,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
|||||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
||||||
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERRCOUNT };
|
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
|
||||||
|
ERR80, ERR81, ERR82, ERR83, ERR84, ERRCOUNT };
|
||||||
|
|
||||||
/* JIT compiling modes. The function list is indexed by them. */
|
/* JIT compiling modes. The function list is indexed by them. */
|
||||||
|
|
||||||
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
|
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
|
||||||
JIT_NUMBER_OF_COMPILE_MODES };
|
JIT_NUMBER_OF_COMPILE_MODES };
|
||||||
|
|
||||||
@ -2406,6 +2462,15 @@ typedef struct open_capitem {
|
|||||||
pcre_uint16 flag; /* Set TRUE if recursive back ref */
|
pcre_uint16 flag; /* Set TRUE if recursive back ref */
|
||||||
} open_capitem;
|
} open_capitem;
|
||||||
|
|
||||||
|
/* Structure for building a list of named groups during the first pass of
|
||||||
|
compiling. */
|
||||||
|
|
||||||
|
typedef struct named_group {
|
||||||
|
const pcre_uchar *name; /* Points to the name in the pattern */
|
||||||
|
int length; /* Length of the name */
|
||||||
|
pcre_uint32 number; /* Group number */
|
||||||
|
} named_group;
|
||||||
|
|
||||||
/* Structure for passing "static" information around between the functions
|
/* Structure for passing "static" information around between the functions
|
||||||
doing the compiling, so that they are thread-safe. */
|
doing the compiling, so that they are thread-safe. */
|
||||||
|
|
||||||
@ -2418,17 +2483,21 @@ typedef struct compile_data {
|
|||||||
const pcre_uchar *start_code; /* The start of the compiled code */
|
const pcre_uchar *start_code; /* The start of the compiled code */
|
||||||
const pcre_uchar *start_pattern; /* The start of the pattern */
|
const pcre_uchar *start_pattern; /* The start of the pattern */
|
||||||
const pcre_uchar *end_pattern; /* The end of the pattern */
|
const pcre_uchar *end_pattern; /* The end of the pattern */
|
||||||
open_capitem *open_caps; /* Chain of open capture items */
|
|
||||||
pcre_uchar *hwm; /* High watermark of workspace */
|
pcre_uchar *hwm; /* High watermark of workspace */
|
||||||
|
open_capitem *open_caps; /* Chain of open capture items */
|
||||||
|
named_group *named_groups; /* Points to vector in pre-compile */
|
||||||
pcre_uchar *name_table; /* The name/number table */
|
pcre_uchar *name_table; /* The name/number table */
|
||||||
int names_found; /* Number of entries so far */
|
int names_found; /* Number of entries so far */
|
||||||
int name_entry_size; /* Size of each entry */
|
int name_entry_size; /* Size of each entry */
|
||||||
|
int named_group_list_size; /* Number of entries in the list */
|
||||||
int workspace_size; /* Size of workspace */
|
int workspace_size; /* Size of workspace */
|
||||||
unsigned int bracount; /* Count of capturing parens as we compile */
|
unsigned int bracount; /* Count of capturing parens as we compile */
|
||||||
int final_bracount; /* Saved value after first pass */
|
int final_bracount; /* Saved value after first pass */
|
||||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||||
int top_backref; /* Maximum back reference */
|
int top_backref; /* Maximum back reference */
|
||||||
unsigned int backref_map; /* Bitmap of low back refs */
|
unsigned int backref_map; /* Bitmap of low back refs */
|
||||||
|
unsigned int namedrefcount; /* Number of backreferences by name */
|
||||||
|
int parens_depth; /* Depth of nested parentheses */
|
||||||
int assert_depth; /* Depth of nested assertions */
|
int assert_depth; /* Depth of nested assertions */
|
||||||
pcre_uint32 external_options; /* External (initial) options */
|
pcre_uint32 external_options; /* External (initial) options */
|
||||||
pcre_uint32 external_flags; /* External flag bits to be set */
|
pcre_uint32 external_flags; /* External flag bits to be set */
|
||||||
@ -2436,6 +2505,7 @@ typedef struct compile_data {
|
|||||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||||
|
BOOL dupnames; /* Duplicate names exist */
|
||||||
int nltype; /* Newline type */
|
int nltype; /* Newline type */
|
||||||
int nllen; /* Newline string length */
|
int nllen; /* Newline string length */
|
||||||
pcre_uchar nl[4]; /* Newline string when fixed length */
|
pcre_uchar nl[4]; /* Newline string when fixed length */
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -501,6 +501,22 @@ static struct regression_test_case regression_test_cases[] = {
|
|||||||
{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
{ MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
||||||
{ PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
{ PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
||||||
{ CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
{ CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||||
|
{ CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||||
|
{ CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||||
|
{ CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||||
|
|
||||||
/* Assertions. */
|
/* Assertions. */
|
||||||
{ MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
{ MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||||
@ -1374,7 +1390,7 @@ static int regression_tests(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
||||||
return_value8[0], return_value16[0],
|
return_value8[0], return_value16[0], return_value32[0],
|
||||||
ovector8_1[0], ovector8_1[1],
|
ovector8_1[0], ovector8_1[1],
|
||||||
ovector16_1[0], ovector16_1[1],
|
ovector16_1[0], ovector16_1[1],
|
||||||
ovector32_1[0], ovector32_1[1],
|
ovector32_1[0], ovector32_1[1],
|
||||||
|
@ -98,13 +98,17 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
|
|||||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||||
|
|
||||||
/* Then the character class tables. Don't try to be clever and save effort on
|
/* Then the character class tables. Don't try to be clever and save effort on
|
||||||
exclusive ones - in some locales things may be different. Note that the table
|
exclusive ones - in some locales things may be different.
|
||||||
for "space" includes everything "isspace" gives, including VT in the default
|
|
||||||
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
Note that the table for "space" includes everything "isspace" gives, including
|
||||||
possible for a character to be alnum or alpha without being lower or upper,
|
VT in the default locale. This makes it work for the POSIX class [:space:].
|
||||||
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
From release 8.34 is is also correct for Perl space, because Perl added VT at
|
||||||
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
release 5.18.
|
||||||
specially. */
|
|
||||||
|
Note also that it is possible for a character to be alnum or alpha without
|
||||||
|
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
|
||||||
|
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
|
||||||
|
test for alnum specially. */
|
||||||
|
|
||||||
memset(p, 0, cbit_length);
|
memset(p, 0, cbit_length);
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
@ -123,14 +127,15 @@ for (i = 0; i < 256; i++)
|
|||||||
}
|
}
|
||||||
p += cbit_length;
|
p += cbit_length;
|
||||||
|
|
||||||
/* Finally, the character type table. In this, we exclude VT from the white
|
/* Finally, the character type table. In this, we used to exclude VT from the
|
||||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
white space chars, because Perl didn't recognize it as such for \s and for
|
||||||
within regexes. */
|
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
|
||||||
|
at release 8.34. */
|
||||||
|
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
int x = 0;
|
int x = 0;
|
||||||
if (i != CHAR_VT && isspace(i)) x += ctype_space;
|
if (isspace(i)) x += ctype_space;
|
||||||
if (isalpha(i)) x += ctype_letter;
|
if (isalpha(i)) x += ctype_letter;
|
||||||
if (isdigit(i)) x += ctype_digit;
|
if (isdigit(i)) x += ctype_digit;
|
||||||
if (isxdigit(i)) x += ctype_xdigit;
|
if (isxdigit(i)) x += ctype_xdigit;
|
||||||
|
@ -425,10 +425,19 @@ for(;;)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CREF:
|
case OP_CREF:
|
||||||
case OP_NCREF:
|
|
||||||
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
|
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OP_DNCREF:
|
||||||
|
{
|
||||||
|
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||||
|
IMM2_SIZE;
|
||||||
|
fprintf(f, " %s Cond ref <", flag);
|
||||||
|
print_puchar(f, entry);
|
||||||
|
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OP_RREF:
|
case OP_RREF:
|
||||||
c = GET2(code, 1);
|
c = GET2(code, 1);
|
||||||
if (c == RREF_ANY)
|
if (c == RREF_ANY)
|
||||||
@ -437,12 +446,14 @@ for(;;)
|
|||||||
fprintf(f, " Cond recurse %d", c);
|
fprintf(f, " Cond recurse %d", c);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_NRREF:
|
case OP_DNRREF:
|
||||||
c = GET2(code, 1);
|
{
|
||||||
if (c == RREF_ANY)
|
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||||
fprintf(f, " Cond nrecurse any");
|
IMM2_SIZE;
|
||||||
else
|
fprintf(f, " %s Cond recurse <", flag);
|
||||||
fprintf(f, " Cond nrecurse %d", c);
|
print_puchar(f, entry);
|
||||||
|
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_DEF:
|
case OP_DEF:
|
||||||
@ -598,6 +609,20 @@ for(;;)
|
|||||||
ccode = code + priv_OP_lengths[*code];
|
ccode = code + priv_OP_lengths[*code];
|
||||||
goto CLASS_REF_REPEAT;
|
goto CLASS_REF_REPEAT;
|
||||||
|
|
||||||
|
case OP_DNREFI:
|
||||||
|
flag = "/i";
|
||||||
|
/* Fall through */
|
||||||
|
case OP_DNREF:
|
||||||
|
{
|
||||||
|
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||||
|
IMM2_SIZE;
|
||||||
|
fprintf(f, " %s \\k<", flag);
|
||||||
|
print_puchar(f, entry);
|
||||||
|
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||||
|
}
|
||||||
|
ccode = code + priv_OP_lengths[*code];
|
||||||
|
goto CLASS_REF_REPEAT;
|
||||||
|
|
||||||
case OP_CALLOUT:
|
case OP_CALLOUT:
|
||||||
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
|
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
|
||||||
GET(code, 2 + LINK_SIZE));
|
GET(code, 2 + LINK_SIZE));
|
||||||
@ -608,9 +633,9 @@ for(;;)
|
|||||||
print_prop(f, code, " ", "");
|
print_prop(f, code, " ", "");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
|
/* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
|
||||||
harm in having this code always here, and it makes it less messy without
|
in having this code always here, and it makes it less messy without all
|
||||||
all those #ifdefs. */
|
those #ifdefs. */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
case OP_NCLASS:
|
case OP_NCLASS:
|
||||||
@ -671,26 +696,51 @@ for(;;)
|
|||||||
pcre_uchar ch;
|
pcre_uchar ch;
|
||||||
while ((ch = *ccode++) != XCL_END)
|
while ((ch = *ccode++) != XCL_END)
|
||||||
{
|
{
|
||||||
if (ch == XCL_PROP)
|
BOOL not = FALSE;
|
||||||
{
|
const char *notch = "";
|
||||||
unsigned int ptype = *ccode++;
|
|
||||||
unsigned int pvalue = *ccode++;
|
switch(ch)
|
||||||
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
|
|
||||||
}
|
|
||||||
else if (ch == XCL_NOTPROP)
|
|
||||||
{
|
|
||||||
unsigned int ptype = *ccode++;
|
|
||||||
unsigned int pvalue = *ccode++;
|
|
||||||
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
case XCL_NOTPROP:
|
||||||
|
not = TRUE;
|
||||||
|
notch = "^";
|
||||||
|
/* Fall through */
|
||||||
|
|
||||||
|
case XCL_PROP:
|
||||||
|
{
|
||||||
|
unsigned int ptype = *ccode++;
|
||||||
|
unsigned int pvalue = *ccode++;
|
||||||
|
|
||||||
|
switch(ptype)
|
||||||
|
{
|
||||||
|
case PT_PXGRAPH:
|
||||||
|
fprintf(f, "[:%sgraph:]", notch);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPRINT:
|
||||||
|
fprintf(f, "[:%sprint:]", notch);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_PXPUNCT:
|
||||||
|
fprintf(f, "[:%spunct:]", notch);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
|
||||||
|
get_ucpname(ptype, pvalue));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
ccode += 1 + print_char(f, ccode, utf);
|
ccode += 1 + print_char(f, ccode, utf);
|
||||||
if (ch == XCL_RANGE)
|
if (ch == XCL_RANGE)
|
||||||
{
|
{
|
||||||
fprintf(f, "-");
|
fprintf(f, "-");
|
||||||
ccode += 1 + print_char(f, ccode, utf);
|
ccode += 1 + print_char(f, ccode, utf);
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -710,17 +760,22 @@ for(;;)
|
|||||||
case OP_CRMINPLUS:
|
case OP_CRMINPLUS:
|
||||||
case OP_CRQUERY:
|
case OP_CRQUERY:
|
||||||
case OP_CRMINQUERY:
|
case OP_CRMINQUERY:
|
||||||
|
case OP_CRPOSSTAR:
|
||||||
|
case OP_CRPOSPLUS:
|
||||||
|
case OP_CRPOSQUERY:
|
||||||
fprintf(f, "%s", priv_OP_names[*ccode]);
|
fprintf(f, "%s", priv_OP_names[*ccode]);
|
||||||
extra += priv_OP_lengths[*ccode];
|
extra += priv_OP_lengths[*ccode];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_CRRANGE:
|
case OP_CRRANGE:
|
||||||
case OP_CRMINRANGE:
|
case OP_CRMINRANGE:
|
||||||
|
case OP_CRPOSRANGE:
|
||||||
min = GET2(ccode,1);
|
min = GET2(ccode,1);
|
||||||
max = GET2(ccode,1 + IMM2_SIZE);
|
max = GET2(ccode,1 + IMM2_SIZE);
|
||||||
if (max == 0) fprintf(f, "{%u,}", min);
|
if (max == 0) fprintf(f, "{%u,}", min);
|
||||||
else fprintf(f, "{%u,%u}", min, max);
|
else fprintf(f, "{%u,%u}", min, max);
|
||||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||||
|
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||||
extra += priv_OP_lengths[*ccode];
|
extra += priv_OP_lengths[*ccode];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user