mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Merge remote-tracking branch 'merge/merge-pcre' into 10.2
This commit is contained in:
10
pcre/AUTHORS
10
pcre/AUTHORS
@ -2,13 +2,13 @@ THE MAIN PCRE LIBRARY
|
||||
---------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -67,10 +67,17 @@
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static build
|
||||
# 2016-06-24 PH applied Chris Wilson's revised patch (adds a separate option)
|
||||
# 2021-06-14 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||
# 2021-06-14 PH applied Wolfgang Stöggl's patch for generating pcre-config and
|
||||
# libpcre*.pc files (Bugzilla #2583)
|
||||
|
||||
PROJECT(PCRE C CXX)
|
||||
|
||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||
# on the command line.
|
||||
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||
|
||||
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
# Enable Unicode properties
|
||||
OPTION(PCRE_SUPPORT_UNICODE_PROPERTIES "Unicode properties" ON)
|
||||
@ -83,6 +90,7 @@ INCLUDE(CheckIncludeFile)
|
||||
INCLUDE(CheckIncludeFileCXX)
|
||||
INCLUDE(CheckFunctionExists)
|
||||
INCLUDE(CheckTypeSize)
|
||||
INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||
|
||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||
@ -255,7 +263,7 @@ foreach(configure_line ${configure_lines})
|
||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||
if (NOT ${_substitution_variable_upper})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||
if (CMAKE_MATCH_1)
|
||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
@ -469,7 +477,6 @@ MARK_AS_ADVANCED(PCRE_POSIX_MALLOC_THRESHOLD)
|
||||
MARK_AS_ADVANCED(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||
MARK_AS_ADVANCED(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||
|
||||
|
||||
IF(PCRE_SHOW_REPORT)
|
||||
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||
IF (CMAKE_C_FLAGS)
|
||||
@ -480,7 +487,7 @@ IF(PCRE_SHOW_REPORT)
|
||||
ENDIF(CMAKE_CXX_FLAGS)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE configuration summary:")
|
||||
MESSAGE(STATUS "PCRE-${PCRE_MAJOR}.${PCRE_MINOR} configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
# MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
|
@ -1,8 +1,25 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
|
||||
development is happening in the PCRE2 10.xx series.
|
||||
Note that the PCRE 8.xx series (PCRE1) is now at end of life. All development
|
||||
is happening in the PCRE2 10.xx series.
|
||||
|
||||
|
||||
Version 8.45 15-June-2021
|
||||
-------------------------
|
||||
|
||||
This is the final release of PCRE1. A few minor tidies are included.
|
||||
|
||||
1. CMakeLists.txt has two user-supplied patches applied, one to allow for the
|
||||
setting of MODULE_PATH, and the other to support the generation of pcre-config
|
||||
file and libpcre*.pc files.
|
||||
|
||||
2. There was a memory leak if a compile error occurred when there were more
|
||||
than 20 named groups (Bugzilla #2613).
|
||||
|
||||
3. Fixed some typos in code and documentation.
|
||||
|
||||
4. Fixed a small (*MARK) bug in the interpreter (Bugzilla #2771).
|
||||
|
||||
|
||||
Version 8.44 12 February-2020
|
||||
|
10
pcre/LICENCE
10
pcre/LICENCE
@ -19,13 +19,13 @@ THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
10
pcre/NEWS
10
pcre/NEWS
@ -1,8 +1,14 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Note that this library (now called PCRE1) is now being maintained for bug fixes
|
||||
only. New projects are advised to use the new PCRE2 libraries.
|
||||
Note that this library (now called PCRE1) is no longer being maintained. New
|
||||
projects are advised to use the PCRE2 libraries.
|
||||
|
||||
|
||||
Release 8.45 15-June-2021
|
||||
-----------------------------
|
||||
|
||||
This is the final PCRE1 release. A very few small issues have been fixed.
|
||||
|
||||
|
||||
Release 8.44 12-February-2020
|
||||
|
@ -5,8 +5,8 @@ NOTE: This document relates to PCRE releases that use the original API, with
|
||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
||||
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
||||
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
||||
(now called PCRE1) are still being maintained for bug fixes, but there will be
|
||||
no new development. New projects are advised to use the new PCRE2 libraries.
|
||||
(now called PCRE1) are now at end of life, and 8.45 is the final release. New
|
||||
projects are advised to use the new PCRE2 libraries.
|
||||
|
||||
|
||||
This document contains the following sections:
|
||||
@ -768,6 +768,6 @@ Everything, source and executable, is in EBCDIC and native z/OS file formats.
|
||||
However, this software is not maintained and will not be upgraded. If you are
|
||||
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
||||
|
||||
===============================
|
||||
Last Updated: 13 September 2017
|
||||
===============================
|
||||
==========================
|
||||
Last Updated: 15 June 2021
|
||||
==========================
|
||||
|
11
pcre/README
11
pcre/README
@ -5,9 +5,8 @@ NOTE: This set of files relates to PCRE releases that use the original API,
|
||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||
first release of a new API, known as PCRE2, with release numbers starting at
|
||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
||||
there will be no new development. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
libraries (now called PCRE1) are now at end of life, and 8.45 is the final
|
||||
release. New projects are advised to use the new PCRE2 libraries.
|
||||
|
||||
|
||||
The latest release of PCRE1 is always available in three alternative formats
|
||||
@ -998,6 +997,6 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||
RunTest.bat a script for running tests under Windows
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 12 February 2020
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 15 June 2021
|
||||
|
@ -9,17 +9,17 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre_major, [8])
|
||||
m4_define(pcre_minor, [44])
|
||||
m4_define(pcre_minor, [45])
|
||||
m4_define(pcre_prerelease, [])
|
||||
m4_define(pcre_date, [2020-02-12])
|
||||
m4_define(pcre_date, [2021-06-15])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre_version, [3:12:2])
|
||||
m4_define(libpcre16_version, [2:12:2])
|
||||
m4_define(libpcre32_version, [0:12:0])
|
||||
m4_define(libpcre_version, [3:13:2])
|
||||
m4_define(libpcre16_version, [2:13:2])
|
||||
m4_define(libpcre32_version, [0:13:0])
|
||||
m4_define(libpcreposix_version, [0:7:0])
|
||||
m4_define(libpcrecpp_version, [0:2:0])
|
||||
|
||||
|
@ -5,8 +5,8 @@ NOTE: This document relates to PCRE releases that use the original API, with
|
||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
||||
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
||||
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
||||
(now called PCRE1) are still being maintained for bug fixes, but there will be
|
||||
no new development. New projects are advised to use the new PCRE2 libraries.
|
||||
(now called PCRE1) are now at end of life, and 8.45 is the final release. New
|
||||
projects are advised to use the new PCRE2 libraries.
|
||||
|
||||
|
||||
This document contains the following sections:
|
||||
@ -768,6 +768,6 @@ Everything, source and executable, is in EBCDIC and native z/OS file formats.
|
||||
However, this software is not maintained and will not be upgraded. If you are
|
||||
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
||||
|
||||
===============================
|
||||
Last Updated: 13 September 2017
|
||||
===============================
|
||||
==========================
|
||||
Last Updated: 15 June 2021
|
||||
==========================
|
||||
|
@ -5,9 +5,8 @@ NOTE: This set of files relates to PCRE releases that use the original API,
|
||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||
first release of a new API, known as PCRE2, with release numbers starting at
|
||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
||||
there will be no new development. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
libraries (now called PCRE1) are now at end of life, and 8.45 is the final
|
||||
release. New projects are advised to use the new PCRE2 libraries.
|
||||
|
||||
|
||||
The latest release of PCRE1 is always available in three alternative formats
|
||||
@ -998,6 +997,6 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||
RunTest.bat a script for running tests under Windows
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 12 February 2020
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 15 June 2021
|
||||
|
@ -22,13 +22,12 @@ man page, in case the conversion went wrong.
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PLEASE TAKE NOTE</a><br>
|
||||
<P>
|
||||
This document relates to PCRE releases that use the original API,
|
||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||
first release of a new API, known as PCRE2, with release numbers starting at
|
||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
||||
there will be no new development. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
This document relates to PCRE releases that use the original API, with library
|
||||
names libpcre, libpcre16, and libpcre32. January 2015 saw the first release of
|
||||
a new API, known as PCRE2, with release numbers starting at 10.00 and library
|
||||
names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries (now called
|
||||
PCRE1) are now at end of life, and 8.45 is the final release. New projects are
|
||||
advised to use the new PCRE2 libraries.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">INTRODUCTION</a><br>
|
||||
<P>
|
||||
@ -215,9 +214,9 @@ two digits 10, at the domain cam.ac.uk.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 10 February 2015
|
||||
Last updated: 14 June 2021
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2021 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
@ -1718,7 +1718,7 @@ very long time, and so the <i>match_limit</i> value is also used in this case
|
||||
<P>
|
||||
The default value for the limit can be set when PCRE is built; the default
|
||||
default is 10 million, which handles all but the most extreme cases. You can
|
||||
override the default by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b>
|
||||
override the default by supplying <b>pcre_exec()</b> with a <b>pcre_extra</b>
|
||||
block in which <i>match_limit</i> is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
||||
the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
|
||||
PCRE_ERROR_MATCHLIMIT.
|
||||
@ -1749,7 +1749,7 @@ and is ignored, when matching is done using JIT compiled code.
|
||||
<P>
|
||||
The default value for <i>match_limit_recursion</i> can be set when PCRE is
|
||||
built; the default default is the same value as the default for
|
||||
<i>match_limit</i>. You can override the default by suppling <b>pcre_exec()</b>
|
||||
<i>match_limit</i>. You can override the default by supplying <b>pcre_exec()</b>
|
||||
with a <b>pcre_extra</b> block in which <i>match_limit_recursion</i> is set, and
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
|
||||
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
|
||||
@ -2063,10 +2063,10 @@ lookbehind. For example, consider the pattern
|
||||
</pre>
|
||||
which finds occurrences of "iss" in the middle of words. (\B matches only if
|
||||
the current position in the subject is not a word boundary.) When applied to
|
||||
the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
|
||||
the string "Mississippi" the first call to <b>pcre_exec()</b> finds the first
|
||||
occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
|
||||
subject, namely "issipi", it does not match, because \B is always false at the
|
||||
start of the subject, which is deemed to be a word boundary. However, if
|
||||
subject, namely "issippi", it does not match, because \B is always false at
|
||||
the start of the subject, which is deemed to be a word boundary. However, if
|
||||
<b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
|
||||
set to 4, it finds the second occurrence of "iss" because it is able to look
|
||||
behind the starting point to discover that it is preceded by a letter.
|
||||
|
@ -161,7 +161,7 @@ if (rc < 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||
|
||||
@ -379,7 +379,7 @@ for (;;)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||
|
||||
|
@ -299,7 +299,7 @@ recommended.
|
||||
This is a suggestion for how a multithreaded program that needs to set up
|
||||
non-default JIT stacks might operate:
|
||||
<pre>
|
||||
During thread initalization
|
||||
During thread initialization
|
||||
thread_local_var = pcre_jit_stack_alloc(...)
|
||||
|
||||
During thread exit
|
||||
|
@ -655,7 +655,7 @@ recognized.
|
||||
<P>
|
||||
It is possible to restrict \R to match only CR, LF, or CRLF (instead of the
|
||||
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
||||
either at compile time or when the pattern is matched. (BSR is an abbrevation
|
||||
either at compile time or when the pattern is matched. (BSR is an abbreviation
|
||||
for "backslash R".) This can be made the default when PCRE is built; if this is
|
||||
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
||||
It is also possible to specify these settings by starting a pattern string with
|
||||
@ -972,7 +972,7 @@ additional characters according to the following rules for ending a cluster:
|
||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
||||
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
||||
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
||||
character; an LVT or T character may be follwed only by a T character.
|
||||
character; an LVT or T character may be followed only by a T character.
|
||||
</P>
|
||||
<P>
|
||||
4. Do not end before extending characters or spacing marks. Characters with
|
||||
@ -3080,7 +3080,7 @@ successful match. Consider:
|
||||
</pre>
|
||||
If the subject is "aaaac...", after the first match attempt fails (starting at
|
||||
the first character in the string), the starting point skips on to start the
|
||||
next attempt at "c". Note that a possessive quantifer does not have the same
|
||||
next attempt at "c". Note that a possessive quantifier does not have the same
|
||||
effect as this example; although it would suppress backtracking during the
|
||||
first match attempt, the second attempt would start at the second character
|
||||
instead of skipping on to "c".
|
||||
|
@ -157,7 +157,7 @@ is changed to use Unicode properties and they match many more characters.
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
Xuc Universally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
</pre>
|
||||
|
@ -1,16 +1,15 @@
|
||||
.TH PCRE 3 "10 February 2015" "PCRE 8.37"
|
||||
.TH PCRE 3 "14 June 2021" "PCRE 8.45"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions (original API)
|
||||
.SH "PLEASE TAKE NOTE"
|
||||
.rs
|
||||
.sp
|
||||
This document relates to PCRE releases that use the original API,
|
||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||
first release of a new API, known as PCRE2, with release numbers starting at
|
||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
||||
there will be no new development. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
This document relates to PCRE releases that use the original API, with library
|
||||
names libpcre, libpcre16, and libpcre32. January 2015 saw the first release of
|
||||
a new API, known as PCRE2, with release numbers starting at 10.00 and library
|
||||
names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries (now called
|
||||
PCRE1) are now at end of life, and 8.45 is the final release. New projects are
|
||||
advised to use the new PCRE2 libraries.
|
||||
.
|
||||
.
|
||||
.SH INTRODUCTION
|
||||
@ -225,6 +224,6 @@ two digits 10, at the domain cam.ac.uk.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 10 February 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 14 June 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
.fi
|
||||
|
@ -21,9 +21,9 @@ PLEASE TAKE NOTE
|
||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||
first release of a new API, known as PCRE2, with release numbers start-
|
||||
ing at 10.00 and library names libpcre2-8, libpcre2-16, and
|
||||
libpcre2-32. The old libraries (now called PCRE1) are still being main-
|
||||
tained for bug fixes, but there will be no new development. New
|
||||
projects are advised to use the new PCRE2 libraries.
|
||||
libpcre2-32. The old libraries (now called PCRE1) are now at end of
|
||||
life, and 8.45 is the final release. New projects are advised to use
|
||||
the new PCRE2 libraries.
|
||||
|
||||
|
||||
INTRODUCTION
|
||||
@ -190,8 +190,8 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 10 February 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 14 June 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -3155,7 +3155,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||
|
||||
The default value for the limit can be set when PCRE is built; the de-
|
||||
fault default is 10 million, which handles all but the most extreme
|
||||
cases. You can override the default by suppling pcre_exec() with a
|
||||
cases. You can override the default by supplying pcre_exec() with a
|
||||
pcre_extra block in which match_limit is set, and PCRE_EX-
|
||||
TRA_MATCH_LIMIT is set in the flags field. If the limit is exceeded,
|
||||
pcre_exec() returns PCRE_ERROR_MATCHLIMIT.
|
||||
@ -3183,7 +3183,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||
|
||||
The default value for match_limit_recursion can be set when PCRE is
|
||||
built; the default default is the same value as the default for
|
||||
match_limit. You can override the default by suppling pcre_exec() with
|
||||
match_limit. You can override the default by supplying pcre_exec() with
|
||||
a pcre_extra block in which match_limit_recursion is set, and PCRE_EX-
|
||||
TRA_MATCH_LIMIT_RECURSION is set in the flags field. If the limit is
|
||||
exceeded, pcre_exec() returns PCRE_ERROR_RECURSIONLIMIT.
|
||||
@ -3475,9 +3475,9 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||
|
||||
which finds occurrences of "iss" in the middle of words. (\B matches
|
||||
only if the current position in the subject is not a word boundary.)
|
||||
When applied to the string "Mississipi" the first call to pcre_exec()
|
||||
When applied to the string "Mississippi" the first call to pcre_exec()
|
||||
finds the first occurrence. If pcre_exec() is called again with just
|
||||
the remainder of the subject, namely "issipi", it does not match, be-
|
||||
the remainder of the subject, namely "issippi", it does not match, be-
|
||||
cause \B is always false at the start of the subject, which is deemed
|
||||
to be a word boundary. However, if pcre_exec() is passed the entire
|
||||
string again, but with startoffset set to 4, it finds the second occur-
|
||||
@ -5292,11 +5292,11 @@ BACKSLASH
|
||||
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
|
||||
the complete set of Unicode line endings) by setting the option
|
||||
PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
|
||||
(BSR is an abbrevation for "backslash R".) This can be made the default
|
||||
when PCRE is built; if this is the case, the other behaviour can be re-
|
||||
quested via the PCRE_BSR_UNICODE option. It is also possible to spec-
|
||||
ify these settings by starting a pattern string with one of the follow-
|
||||
ing sequences:
|
||||
(BSR is an abbreviation for "backslash R".) This can be made the de-
|
||||
fault when PCRE is built; if this is the case, the other behaviour can
|
||||
be requested via the PCRE_BSR_UNICODE option. It is also possible to
|
||||
specify these settings by starting a pattern string with one of the
|
||||
following sequences:
|
||||
|
||||
(*BSR_ANYCRLF) CR, LF, or CRLF only
|
||||
(*BSR_UNICODE) any Unicode newline sequence
|
||||
@ -5488,8 +5488,8 @@ BACKSLASH
|
||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul
|
||||
characters are of five types: L, V, T, LV, and LVT. An L character may
|
||||
be followed by an L, V, LV, or LVT character; an LV or V character may
|
||||
be followed by a V or T character; an LVT or T character may be follwed
|
||||
only by a T character.
|
||||
be followed by a V or T character; an LVT or T character may be fol-
|
||||
lowed only by a T character.
|
||||
|
||||
4. Do not end before extending characters or spacing marks. Characters
|
||||
with the "mark" property always have the "extend" grapheme breaking
|
||||
@ -7494,7 +7494,7 @@ BACKTRACKING CONTROL
|
||||
If the subject is "aaaac...", after the first match attempt fails
|
||||
(starting at the first character in the string), the starting point
|
||||
skips on to start the next attempt at "c". Note that a possessive quan-
|
||||
tifer does not have the same effect as this example; although it would
|
||||
tifier does not have the same effect as this example; although it would
|
||||
suppress backtracking during the first match attempt, the second at-
|
||||
tempt would start at the second character instead of skipping on to
|
||||
"c".
|
||||
@ -7793,7 +7793,7 @@ PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
Xuc Universally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
|
||||
@ -8571,7 +8571,7 @@ CONTROLLING THE JIT STACK
|
||||
This is a suggestion for how a multithreaded program that needs to set
|
||||
up non-default JIT stacks might operate:
|
||||
|
||||
During thread initalization
|
||||
During thread initialization
|
||||
thread_local_var = pcre_jit_stack_alloc(...)
|
||||
|
||||
During thread exit
|
||||
|
@ -1699,7 +1699,7 @@ very long time, and so the \fImatch_limit\fP value is also used in this case
|
||||
.P
|
||||
The default value for the limit can be set when PCRE is built; the default
|
||||
default is 10 million, which handles all but the most extreme cases. You can
|
||||
override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
|
||||
override the default by supplying \fBpcre_exec()\fP with a \fBpcre_extra\fP
|
||||
block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
||||
the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
|
||||
PCRE_ERROR_MATCHLIMIT.
|
||||
@ -1726,7 +1726,7 @@ and is ignored, when matching is done using JIT compiled code.
|
||||
.P
|
||||
The default value for \fImatch_limit_recursion\fP can be set when PCRE is
|
||||
built; the default default is the same value as the default for
|
||||
\fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP
|
||||
\fImatch_limit\fP. You can override the default by supplying \fBpcre_exec()\fP
|
||||
with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
|
||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
|
||||
is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
|
||||
@ -2044,10 +2044,10 @@ lookbehind. For example, consider the pattern
|
||||
.sp
|
||||
which finds occurrences of "iss" in the middle of words. (\eB matches only if
|
||||
the current position in the subject is not a word boundary.) When applied to
|
||||
the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first
|
||||
the string "Mississippi" the first call to \fBpcre_exec()\fP finds the first
|
||||
occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
|
||||
subject, namely "issipi", it does not match, because \eB is always false at the
|
||||
start of the subject, which is deemed to be a word boundary. However, if
|
||||
subject, namely "issippi", it does not match, because \eB is always false at
|
||||
the start of the subject, which is deemed to be a word boundary. However, if
|
||||
\fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
|
||||
set to 4, it finds the second occurrence of "iss" because it is able to look
|
||||
behind the starting point to discover that it is preceded by a letter.
|
||||
|
@ -161,7 +161,7 @@ if (rc < 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\enMatch succeeded at offset %d\en", ovector[0]);
|
||||
|
||||
@ -379,7 +379,7 @@ for (;;)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\enMatch succeeded again at offset %d\en", ovector[0]);
|
||||
|
||||
|
@ -285,7 +285,7 @@ recommended.
|
||||
This is a suggestion for how a multithreaded program that needs to set up
|
||||
non-default JIT stacks might operate:
|
||||
.sp
|
||||
During thread initalization
|
||||
During thread initialization
|
||||
thread_local_var = pcre_jit_stack_alloc(...)
|
||||
.sp
|
||||
During thread exit
|
||||
|
@ -656,7 +656,7 @@ recognized.
|
||||
.P
|
||||
It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
|
||||
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
||||
either at compile time or when the pattern is matched. (BSR is an abbrevation
|
||||
either at compile time or when the pattern is matched. (BSR is an abbreviation
|
||||
for "backslash R".) This can be made the default when PCRE is built; if this is
|
||||
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
||||
It is also possible to specify these settings by starting a pattern string with
|
||||
@ -968,7 +968,7 @@ additional characters according to the following rules for ending a cluster:
|
||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
||||
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
||||
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
||||
character; an LVT or T character may be follwed only by a T character.
|
||||
character; an LVT or T character may be followed only by a T character.
|
||||
.P
|
||||
4. Do not end before extending characters or spacing marks. Characters with
|
||||
the "mark" property always have the "extend" grapheme breaking property.
|
||||
@ -3115,7 +3115,7 @@ successful match. Consider:
|
||||
.sp
|
||||
If the subject is "aaaac...", after the first match attempt fails (starting at
|
||||
the first character in the string), the starting point skips on to start the
|
||||
next attempt at "c". Note that a possessive quantifer does not have the same
|
||||
next attempt at "c". Note that a possessive quantifier does not have the same
|
||||
effect as this example; although it would suppress backtracking during the
|
||||
first match attempt, the second attempt would start at the second character
|
||||
instead of skipping on to "c".
|
||||
|
@ -123,7 +123,7 @@ is changed to use Unicode properties and they match many more characters.
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
Xuc Universally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
.sp
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -9102,6 +9102,8 @@ pcre_uchar cworkspace[COMPILE_WORK_SIZE];
|
||||
similar way to cworkspace, it can be expanded using malloc() if necessary. */
|
||||
|
||||
named_group named_groups[NAMED_GROUP_LIST_SIZE];
|
||||
cd->named_groups = named_groups;
|
||||
cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
|
||||
|
||||
/* Set this early so that early errors get offset 0. */
|
||||
|
||||
@ -9375,8 +9377,6 @@ cd->hwm = cworkspace;
|
||||
cd->iscondassert = FALSE;
|
||||
cd->start_workspace = cworkspace;
|
||||
cd->workspace_size = COMPILE_WORK_SIZE;
|
||||
cd->named_groups = named_groups;
|
||||
cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
|
||||
cd->start_pattern = (const pcre_uchar *)pattern;
|
||||
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
|
||||
cd->req_varyopt = 0;
|
||||
@ -9487,6 +9487,7 @@ if (cd->names_found > 0)
|
||||
add_name(cd, ng->name, ng->length, ng->number);
|
||||
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
||||
(PUBL(free))((void *)cd->named_groups);
|
||||
cd->named_group_list_size = 0; /* So we don't free it twice */
|
||||
}
|
||||
|
||||
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
||||
@ -9637,6 +9638,8 @@ if (errorcode != 0)
|
||||
{
|
||||
(PUBL(free))(re);
|
||||
PCRE_EARLY_ERROR_RETURN:
|
||||
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
||||
(PUBL(free))((void *)cd->named_groups);
|
||||
*erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
|
||||
PCRE_EARLY_ERROR_RETURN2:
|
||||
*errorptr = find_error_text(errorcode);
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2018 University of Cambridge
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -764,7 +764,7 @@ for (;;)
|
||||
md->mark = NULL; /* In case previously set by assertion */
|
||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
|
||||
eptrb, RM55);
|
||||
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
|
||||
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT || rrc == MATCH_KETRPOS) &&
|
||||
md->mark == NULL) md->mark = ecode + 2;
|
||||
|
||||
/* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
|
||||
|
@ -144,7 +144,7 @@ if (rc < 0)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||
|
||||
@ -362,7 +362,7 @@ for (;;)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
/* Match succeeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||
|
||||
|
3
pcre/testdata/testinput1
vendored
3
pcre/testdata/testinput1
vendored
@ -5757,4 +5757,7 @@ AbcdCBefgBhiBqz
|
||||
/(?&word)* \. (?<word> \w+ )/xi
|
||||
pokus.hokus
|
||||
|
||||
/(?:A(*:X))*+/K
|
||||
A
|
||||
|
||||
/-- End of testinput1 --/
|
||||
|
5
pcre/testdata/testoutput1
vendored
5
pcre/testdata/testoutput1
vendored
@ -9470,4 +9470,9 @@ No match
|
||||
0: pokus.hokus
|
||||
1: hokus
|
||||
|
||||
/(?:A(*:X))*+/K
|
||||
A
|
||||
0: A
|
||||
MK: X
|
||||
|
||||
/-- End of testinput1 --/
|
||||
|
Reference in New Issue
Block a user