mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Merge remote-tracking branch 'merge/merge-pcre' into 10.2
This commit is contained in:
10
pcre/AUTHORS
10
pcre/AUTHORS
@ -2,13 +2,13 @@ THE MAIN PCRE LIBRARY
|
|||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
Written by: Philip Hazel
|
Written by: Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2020 University of Cambridge
|
Copyright (c) 1997-2021 University of Cambridge
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -67,10 +67,17 @@
|
|||||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||||
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static build
|
# 2016-03-01 PH applied Chris Wilson's patch for MSVC static build
|
||||||
# 2016-06-24 PH applied Chris Wilson's revised patch (adds a separate option)
|
# 2016-06-24 PH applied Chris Wilson's revised patch (adds a separate option)
|
||||||
|
# 2021-06-14 PH changed CMAKE_MODULE_PATH definition to add, not replace
|
||||||
|
# 2021-06-14 PH applied Wolfgang Stöggl's patch for generating pcre-config and
|
||||||
|
# libpcre*.pc files (Bugzilla #2583)
|
||||||
|
|
||||||
PROJECT(PCRE C CXX)
|
PROJECT(PCRE C CXX)
|
||||||
|
|
||||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
|
||||||
|
# on the command line.
|
||||||
|
# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
|
||||||
|
|
||||||
|
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
# Enable Unicode properties
|
# Enable Unicode properties
|
||||||
OPTION(PCRE_SUPPORT_UNICODE_PROPERTIES "Unicode properties" ON)
|
OPTION(PCRE_SUPPORT_UNICODE_PROPERTIES "Unicode properties" ON)
|
||||||
@ -83,6 +90,7 @@ INCLUDE(CheckIncludeFile)
|
|||||||
INCLUDE(CheckIncludeFileCXX)
|
INCLUDE(CheckIncludeFileCXX)
|
||||||
INCLUDE(CheckFunctionExists)
|
INCLUDE(CheckFunctionExists)
|
||||||
INCLUDE(CheckTypeSize)
|
INCLUDE(CheckTypeSize)
|
||||||
|
INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR
|
||||||
|
|
||||||
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
|
||||||
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
|
||||||
@ -255,7 +263,7 @@ foreach(configure_line ${configure_lines})
|
|||||||
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
foreach(_substitution_variable ${SEARCHED_VARIABLES})
|
||||||
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
|
||||||
if (NOT ${_substitution_variable_upper})
|
if (NOT ${_substitution_variable_upper})
|
||||||
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
|
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MATCHED_STRING ${configure_line})
|
||||||
if (CMAKE_MATCH_1)
|
if (CMAKE_MATCH_1)
|
||||||
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
|
||||||
endif()
|
endif()
|
||||||
@ -469,7 +477,6 @@ MARK_AS_ADVANCED(PCRE_POSIX_MALLOC_THRESHOLD)
|
|||||||
MARK_AS_ADVANCED(PCRE_SUPPORT_BSR_ANYCRLF)
|
MARK_AS_ADVANCED(PCRE_SUPPORT_BSR_ANYCRLF)
|
||||||
MARK_AS_ADVANCED(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
MARK_AS_ADVANCED(PCRE_SUPPORT_UNICODE_PROPERTIES)
|
||||||
|
|
||||||
|
|
||||||
IF(PCRE_SHOW_REPORT)
|
IF(PCRE_SHOW_REPORT)
|
||||||
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
|
||||||
IF (CMAKE_C_FLAGS)
|
IF (CMAKE_C_FLAGS)
|
||||||
@ -480,7 +487,7 @@ IF(PCRE_SHOW_REPORT)
|
|||||||
ENDIF(CMAKE_CXX_FLAGS)
|
ENDIF(CMAKE_CXX_FLAGS)
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
MESSAGE(STATUS "PCRE configuration summary:")
|
MESSAGE(STATUS "PCRE-${PCRE_MAJOR}.${PCRE_MINOR} configuration summary:")
|
||||||
MESSAGE(STATUS "")
|
MESSAGE(STATUS "")
|
||||||
# MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
# MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||||
|
@ -1,8 +1,25 @@
|
|||||||
ChangeLog for PCRE
|
ChangeLog for PCRE
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
|
Note that the PCRE 8.xx series (PCRE1) is now at end of life. All development
|
||||||
development is happening in the PCRE2 10.xx series.
|
is happening in the PCRE2 10.xx series.
|
||||||
|
|
||||||
|
|
||||||
|
Version 8.45 15-June-2021
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
This is the final release of PCRE1. A few minor tidies are included.
|
||||||
|
|
||||||
|
1. CMakeLists.txt has two user-supplied patches applied, one to allow for the
|
||||||
|
setting of MODULE_PATH, and the other to support the generation of pcre-config
|
||||||
|
file and libpcre*.pc files.
|
||||||
|
|
||||||
|
2. There was a memory leak if a compile error occurred when there were more
|
||||||
|
than 20 named groups (Bugzilla #2613).
|
||||||
|
|
||||||
|
3. Fixed some typos in code and documentation.
|
||||||
|
|
||||||
|
4. Fixed a small (*MARK) bug in the interpreter (Bugzilla #2771).
|
||||||
|
|
||||||
|
|
||||||
Version 8.44 12 February-2020
|
Version 8.44 12 February-2020
|
||||||
|
10
pcre/LICENCE
10
pcre/LICENCE
@ -19,13 +19,13 @@ THE BASIC LIBRARY FUNCTIONS
|
|||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
Written by: Philip Hazel
|
Written by: Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2020 University of Cambridge
|
Copyright (c) 1997-2021 University of Cambridge
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Email domain: freemail.hu
|
Email domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
|||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Email domain: freemail.hu
|
Email domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
10
pcre/NEWS
10
pcre/NEWS
@ -1,8 +1,14 @@
|
|||||||
News about PCRE releases
|
News about PCRE releases
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
Note that this library (now called PCRE1) is now being maintained for bug fixes
|
Note that this library (now called PCRE1) is no longer being maintained. New
|
||||||
only. New projects are advised to use the new PCRE2 libraries.
|
projects are advised to use the PCRE2 libraries.
|
||||||
|
|
||||||
|
|
||||||
|
Release 8.45 15-June-2021
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
This is the final PCRE1 release. A very few small issues have been fixed.
|
||||||
|
|
||||||
|
|
||||||
Release 8.44 12-February-2020
|
Release 8.44 12-February-2020
|
||||||
|
@ -5,8 +5,8 @@ NOTE: This document relates to PCRE releases that use the original API, with
|
|||||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
||||||
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
||||||
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
||||||
(now called PCRE1) are still being maintained for bug fixes, but there will be
|
(now called PCRE1) are now at end of life, and 8.45 is the final release. New
|
||||||
no new development. New projects are advised to use the new PCRE2 libraries.
|
projects are advised to use the new PCRE2 libraries.
|
||||||
|
|
||||||
|
|
||||||
This document contains the following sections:
|
This document contains the following sections:
|
||||||
@ -768,6 +768,6 @@ Everything, source and executable, is in EBCDIC and native z/OS file formats.
|
|||||||
However, this software is not maintained and will not be upgraded. If you are
|
However, this software is not maintained and will not be upgraded. If you are
|
||||||
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
||||||
|
|
||||||
===============================
|
==========================
|
||||||
Last Updated: 13 September 2017
|
Last Updated: 15 June 2021
|
||||||
===============================
|
==========================
|
||||||
|
11
pcre/README
11
pcre/README
@ -5,9 +5,8 @@ NOTE: This set of files relates to PCRE releases that use the original API,
|
|||||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||||
first release of a new API, known as PCRE2, with release numbers starting at
|
first release of a new API, known as PCRE2, with release numbers starting at
|
||||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
libraries (now called PCRE1) are now at end of life, and 8.45 is the final
|
||||||
there will be no new development. New projects are advised to use the new PCRE2
|
release. New projects are advised to use the new PCRE2 libraries.
|
||||||
libraries.
|
|
||||||
|
|
||||||
|
|
||||||
The latest release of PCRE1 is always available in three alternative formats
|
The latest release of PCRE1 is always available in three alternative formats
|
||||||
@ -998,6 +997,6 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||||||
RunTest.bat a script for running tests under Windows
|
RunTest.bat a script for running tests under Windows
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
Last updated: 12 February 2020
|
Last updated: 15 June 2021
|
||||||
|
@ -9,17 +9,17 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
|||||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
m4_define(pcre_major, [8])
|
m4_define(pcre_major, [8])
|
||||||
m4_define(pcre_minor, [44])
|
m4_define(pcre_minor, [45])
|
||||||
m4_define(pcre_prerelease, [])
|
m4_define(pcre_prerelease, [])
|
||||||
m4_define(pcre_date, [2020-02-12])
|
m4_define(pcre_date, [2021-06-15])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre_version, [3:12:2])
|
m4_define(libpcre_version, [3:13:2])
|
||||||
m4_define(libpcre16_version, [2:12:2])
|
m4_define(libpcre16_version, [2:13:2])
|
||||||
m4_define(libpcre32_version, [0:12:0])
|
m4_define(libpcre32_version, [0:13:0])
|
||||||
m4_define(libpcreposix_version, [0:7:0])
|
m4_define(libpcreposix_version, [0:7:0])
|
||||||
m4_define(libpcrecpp_version, [0:2:0])
|
m4_define(libpcrecpp_version, [0:2:0])
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@ NOTE: This document relates to PCRE releases that use the original API, with
|
|||||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
library names libpcre, libpcre16, and libpcre32. January 2015 saw the first
|
||||||
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
release of a new API, known as PCRE2, with release numbers starting at 10.00
|
||||||
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries
|
||||||
(now called PCRE1) are still being maintained for bug fixes, but there will be
|
(now called PCRE1) are now at end of life, and 8.45 is the final release. New
|
||||||
no new development. New projects are advised to use the new PCRE2 libraries.
|
projects are advised to use the new PCRE2 libraries.
|
||||||
|
|
||||||
|
|
||||||
This document contains the following sections:
|
This document contains the following sections:
|
||||||
@ -768,6 +768,6 @@ Everything, source and executable, is in EBCDIC and native z/OS file formats.
|
|||||||
However, this software is not maintained and will not be upgraded. If you are
|
However, this software is not maintained and will not be upgraded. If you are
|
||||||
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
new to PCRE you should be looking at PCRE2 (version 10.30 or later).
|
||||||
|
|
||||||
===============================
|
==========================
|
||||||
Last Updated: 13 September 2017
|
Last Updated: 15 June 2021
|
||||||
===============================
|
==========================
|
||||||
|
@ -5,9 +5,8 @@ NOTE: This set of files relates to PCRE releases that use the original API,
|
|||||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||||
first release of a new API, known as PCRE2, with release numbers starting at
|
first release of a new API, known as PCRE2, with release numbers starting at
|
||||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
||||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
libraries (now called PCRE1) are now at end of life, and 8.45 is the final
|
||||||
there will be no new development. New projects are advised to use the new PCRE2
|
release. New projects are advised to use the new PCRE2 libraries.
|
||||||
libraries.
|
|
||||||
|
|
||||||
|
|
||||||
The latest release of PCRE1 is always available in three alternative formats
|
The latest release of PCRE1 is always available in three alternative formats
|
||||||
@ -998,6 +997,6 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||||||
RunTest.bat a script for running tests under Windows
|
RunTest.bat a script for running tests under Windows
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
Last updated: 12 February 2020
|
Last updated: 15 June 2021
|
||||||
|
@ -22,13 +22,12 @@ man page, in case the conversion went wrong.
|
|||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PLEASE TAKE NOTE</a><br>
|
<br><a name="SEC1" href="#TOC1">PLEASE TAKE NOTE</a><br>
|
||||||
<P>
|
<P>
|
||||||
This document relates to PCRE releases that use the original API,
|
This document relates to PCRE releases that use the original API, with library
|
||||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
names libpcre, libpcre16, and libpcre32. January 2015 saw the first release of
|
||||||
first release of a new API, known as PCRE2, with release numbers starting at
|
a new API, known as PCRE2, with release numbers starting at 10.00 and library
|
||||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries (now called
|
||||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
PCRE1) are now at end of life, and 8.45 is the final release. New projects are
|
||||||
there will be no new development. New projects are advised to use the new PCRE2
|
advised to use the new PCRE2 libraries.
|
||||||
libraries.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">INTRODUCTION</a><br>
|
<br><a name="SEC2" href="#TOC1">INTRODUCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
@ -215,9 +214,9 @@ two digits 10, at the domain cam.ac.uk.
|
|||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 10 February 2015
|
Last updated: 14 June 2021
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2021 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE index page</a>.
|
Return to the <a href="index.html">PCRE index page</a>.
|
||||||
|
@ -1718,7 +1718,7 @@ very long time, and so the <i>match_limit</i> value is also used in this case
|
|||||||
<P>
|
<P>
|
||||||
The default value for the limit can be set when PCRE is built; the default
|
The default value for the limit can be set when PCRE is built; the default
|
||||||
default is 10 million, which handles all but the most extreme cases. You can
|
default is 10 million, which handles all but the most extreme cases. You can
|
||||||
override the default by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b>
|
override the default by supplying <b>pcre_exec()</b> with a <b>pcre_extra</b>
|
||||||
block in which <i>match_limit</i> is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
block in which <i>match_limit</i> is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
||||||
the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
|
the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
|
||||||
PCRE_ERROR_MATCHLIMIT.
|
PCRE_ERROR_MATCHLIMIT.
|
||||||
@ -1749,7 +1749,7 @@ and is ignored, when matching is done using JIT compiled code.
|
|||||||
<P>
|
<P>
|
||||||
The default value for <i>match_limit_recursion</i> can be set when PCRE is
|
The default value for <i>match_limit_recursion</i> can be set when PCRE is
|
||||||
built; the default default is the same value as the default for
|
built; the default default is the same value as the default for
|
||||||
<i>match_limit</i>. You can override the default by suppling <b>pcre_exec()</b>
|
<i>match_limit</i>. You can override the default by supplying <b>pcre_exec()</b>
|
||||||
with a <b>pcre_extra</b> block in which <i>match_limit_recursion</i> is set, and
|
with a <b>pcre_extra</b> block in which <i>match_limit_recursion</i> is set, and
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
|
||||||
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
|
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
|
||||||
@ -2063,10 +2063,10 @@ lookbehind. For example, consider the pattern
|
|||||||
</pre>
|
</pre>
|
||||||
which finds occurrences of "iss" in the middle of words. (\B matches only if
|
which finds occurrences of "iss" in the middle of words. (\B matches only if
|
||||||
the current position in the subject is not a word boundary.) When applied to
|
the current position in the subject is not a word boundary.) When applied to
|
||||||
the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
|
the string "Mississippi" the first call to <b>pcre_exec()</b> finds the first
|
||||||
occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
|
occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
|
||||||
subject, namely "issipi", it does not match, because \B is always false at the
|
subject, namely "issippi", it does not match, because \B is always false at
|
||||||
start of the subject, which is deemed to be a word boundary. However, if
|
the start of the subject, which is deemed to be a word boundary. However, if
|
||||||
<b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
|
<b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
|
||||||
set to 4, it finds the second occurrence of "iss" because it is able to look
|
set to 4, it finds the second occurrence of "iss" because it is able to look
|
||||||
behind the starting point to discover that it is preceded by a letter.
|
behind the starting point to discover that it is preceded by a letter.
|
||||||
|
@ -161,7 +161,7 @@ if (rc < 0)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
@ -379,7 +379,7 @@ for (;;)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
|
@ -299,7 +299,7 @@ recommended.
|
|||||||
This is a suggestion for how a multithreaded program that needs to set up
|
This is a suggestion for how a multithreaded program that needs to set up
|
||||||
non-default JIT stacks might operate:
|
non-default JIT stacks might operate:
|
||||||
<pre>
|
<pre>
|
||||||
During thread initalization
|
During thread initialization
|
||||||
thread_local_var = pcre_jit_stack_alloc(...)
|
thread_local_var = pcre_jit_stack_alloc(...)
|
||||||
|
|
||||||
During thread exit
|
During thread exit
|
||||||
|
@ -655,7 +655,7 @@ recognized.
|
|||||||
<P>
|
<P>
|
||||||
It is possible to restrict \R to match only CR, LF, or CRLF (instead of the
|
It is possible to restrict \R to match only CR, LF, or CRLF (instead of the
|
||||||
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
||||||
either at compile time or when the pattern is matched. (BSR is an abbrevation
|
either at compile time or when the pattern is matched. (BSR is an abbreviation
|
||||||
for "backslash R".) This can be made the default when PCRE is built; if this is
|
for "backslash R".) This can be made the default when PCRE is built; if this is
|
||||||
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
||||||
It is also possible to specify these settings by starting a pattern string with
|
It is also possible to specify these settings by starting a pattern string with
|
||||||
@ -972,7 +972,7 @@ additional characters according to the following rules for ending a cluster:
|
|||||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
||||||
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
||||||
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
||||||
character; an LVT or T character may be follwed only by a T character.
|
character; an LVT or T character may be followed only by a T character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
4. Do not end before extending characters or spacing marks. Characters with
|
4. Do not end before extending characters or spacing marks. Characters with
|
||||||
@ -3080,7 +3080,7 @@ successful match. Consider:
|
|||||||
</pre>
|
</pre>
|
||||||
If the subject is "aaaac...", after the first match attempt fails (starting at
|
If the subject is "aaaac...", after the first match attempt fails (starting at
|
||||||
the first character in the string), the starting point skips on to start the
|
the first character in the string), the starting point skips on to start the
|
||||||
next attempt at "c". Note that a possessive quantifer does not have the same
|
next attempt at "c". Note that a possessive quantifier does not have the same
|
||||||
effect as this example; although it would suppress backtracking during the
|
effect as this example; although it would suppress backtracking during the
|
||||||
first match attempt, the second attempt would start at the second character
|
first match attempt, the second attempt would start at the second character
|
||||||
instead of skipping on to "c".
|
instead of skipping on to "c".
|
||||||
|
@ -157,7 +157,7 @@ is changed to use Unicode properties and they match many more characters.
|
|||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
Xuc Univerally-named character: one that can be
|
Xuc Universally-named character: one that can be
|
||||||
represented by a Universal Character Name
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1,16 +1,15 @@
|
|||||||
.TH PCRE 3 "10 February 2015" "PCRE 8.37"
|
.TH PCRE 3 "14 June 2021" "PCRE 8.45"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions (original API)
|
PCRE - Perl-compatible regular expressions (original API)
|
||||||
.SH "PLEASE TAKE NOTE"
|
.SH "PLEASE TAKE NOTE"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This document relates to PCRE releases that use the original API,
|
This document relates to PCRE releases that use the original API, with library
|
||||||
with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
names libpcre, libpcre16, and libpcre32. January 2015 saw the first release of
|
||||||
first release of a new API, known as PCRE2, with release numbers starting at
|
a new API, known as PCRE2, with release numbers starting at 10.00 and library
|
||||||
10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
|
names libpcre2-8, libpcre2-16, and libpcre2-32. The old libraries (now called
|
||||||
libraries (now called PCRE1) are still being maintained for bug fixes, but
|
PCRE1) are now at end of life, and 8.45 is the final release. New projects are
|
||||||
there will be no new development. New projects are advised to use the new PCRE2
|
advised to use the new PCRE2 libraries.
|
||||||
libraries.
|
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
@ -225,6 +224,6 @@ two digits 10, at the domain cam.ac.uk.
|
|||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 10 February 2015
|
Last updated: 14 June 2021
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2021 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
@ -21,9 +21,9 @@ PLEASE TAKE NOTE
|
|||||||
library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
library names libpcre, libpcre16, and libpcre32. January 2015 saw the
|
||||||
first release of a new API, known as PCRE2, with release numbers start-
|
first release of a new API, known as PCRE2, with release numbers start-
|
||||||
ing at 10.00 and library names libpcre2-8, libpcre2-16, and
|
ing at 10.00 and library names libpcre2-8, libpcre2-16, and
|
||||||
libpcre2-32. The old libraries (now called PCRE1) are still being main-
|
libpcre2-32. The old libraries (now called PCRE1) are now at end of
|
||||||
tained for bug fixes, but there will be no new development. New
|
life, and 8.45 is the final release. New projects are advised to use
|
||||||
projects are advised to use the new PCRE2 libraries.
|
the new PCRE2 libraries.
|
||||||
|
|
||||||
|
|
||||||
INTRODUCTION
|
INTRODUCTION
|
||||||
@ -190,8 +190,8 @@ AUTHOR
|
|||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 10 February 2015
|
Last updated: 14 June 2021
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2021 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@ -3155,7 +3155,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||||||
|
|
||||||
The default value for the limit can be set when PCRE is built; the de-
|
The default value for the limit can be set when PCRE is built; the de-
|
||||||
fault default is 10 million, which handles all but the most extreme
|
fault default is 10 million, which handles all but the most extreme
|
||||||
cases. You can override the default by suppling pcre_exec() with a
|
cases. You can override the default by supplying pcre_exec() with a
|
||||||
pcre_extra block in which match_limit is set, and PCRE_EX-
|
pcre_extra block in which match_limit is set, and PCRE_EX-
|
||||||
TRA_MATCH_LIMIT is set in the flags field. If the limit is exceeded,
|
TRA_MATCH_LIMIT is set in the flags field. If the limit is exceeded,
|
||||||
pcre_exec() returns PCRE_ERROR_MATCHLIMIT.
|
pcre_exec() returns PCRE_ERROR_MATCHLIMIT.
|
||||||
@ -3183,7 +3183,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||||||
|
|
||||||
The default value for match_limit_recursion can be set when PCRE is
|
The default value for match_limit_recursion can be set when PCRE is
|
||||||
built; the default default is the same value as the default for
|
built; the default default is the same value as the default for
|
||||||
match_limit. You can override the default by suppling pcre_exec() with
|
match_limit. You can override the default by supplying pcre_exec() with
|
||||||
a pcre_extra block in which match_limit_recursion is set, and PCRE_EX-
|
a pcre_extra block in which match_limit_recursion is set, and PCRE_EX-
|
||||||
TRA_MATCH_LIMIT_RECURSION is set in the flags field. If the limit is
|
TRA_MATCH_LIMIT_RECURSION is set in the flags field. If the limit is
|
||||||
exceeded, pcre_exec() returns PCRE_ERROR_RECURSIONLIMIT.
|
exceeded, pcre_exec() returns PCRE_ERROR_RECURSIONLIMIT.
|
||||||
@ -3475,9 +3475,9 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||||||
|
|
||||||
which finds occurrences of "iss" in the middle of words. (\B matches
|
which finds occurrences of "iss" in the middle of words. (\B matches
|
||||||
only if the current position in the subject is not a word boundary.)
|
only if the current position in the subject is not a word boundary.)
|
||||||
When applied to the string "Mississipi" the first call to pcre_exec()
|
When applied to the string "Mississippi" the first call to pcre_exec()
|
||||||
finds the first occurrence. If pcre_exec() is called again with just
|
finds the first occurrence. If pcre_exec() is called again with just
|
||||||
the remainder of the subject, namely "issipi", it does not match, be-
|
the remainder of the subject, namely "issippi", it does not match, be-
|
||||||
cause \B is always false at the start of the subject, which is deemed
|
cause \B is always false at the start of the subject, which is deemed
|
||||||
to be a word boundary. However, if pcre_exec() is passed the entire
|
to be a word boundary. However, if pcre_exec() is passed the entire
|
||||||
string again, but with startoffset set to 4, it finds the second occur-
|
string again, but with startoffset set to 4, it finds the second occur-
|
||||||
@ -5292,11 +5292,11 @@ BACKSLASH
|
|||||||
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
|
It is possible to restrict \R to match only CR, LF, or CRLF (instead of
|
||||||
the complete set of Unicode line endings) by setting the option
|
the complete set of Unicode line endings) by setting the option
|
||||||
PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
|
PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
|
||||||
(BSR is an abbrevation for "backslash R".) This can be made the default
|
(BSR is an abbreviation for "backslash R".) This can be made the de-
|
||||||
when PCRE is built; if this is the case, the other behaviour can be re-
|
fault when PCRE is built; if this is the case, the other behaviour can
|
||||||
quested via the PCRE_BSR_UNICODE option. It is also possible to spec-
|
be requested via the PCRE_BSR_UNICODE option. It is also possible to
|
||||||
ify these settings by starting a pattern string with one of the follow-
|
specify these settings by starting a pattern string with one of the
|
||||||
ing sequences:
|
following sequences:
|
||||||
|
|
||||||
(*BSR_ANYCRLF) CR, LF, or CRLF only
|
(*BSR_ANYCRLF) CR, LF, or CRLF only
|
||||||
(*BSR_UNICODE) any Unicode newline sequence
|
(*BSR_UNICODE) any Unicode newline sequence
|
||||||
@ -5488,8 +5488,8 @@ BACKSLASH
|
|||||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul
|
3. Do not break Hangul (a Korean script) syllable sequences. Hangul
|
||||||
characters are of five types: L, V, T, LV, and LVT. An L character may
|
characters are of five types: L, V, T, LV, and LVT. An L character may
|
||||||
be followed by an L, V, LV, or LVT character; an LV or V character may
|
be followed by an L, V, LV, or LVT character; an LV or V character may
|
||||||
be followed by a V or T character; an LVT or T character may be follwed
|
be followed by a V or T character; an LVT or T character may be fol-
|
||||||
only by a T character.
|
lowed only by a T character.
|
||||||
|
|
||||||
4. Do not end before extending characters or spacing marks. Characters
|
4. Do not end before extending characters or spacing marks. Characters
|
||||||
with the "mark" property always have the "extend" grapheme breaking
|
with the "mark" property always have the "extend" grapheme breaking
|
||||||
@ -7494,7 +7494,7 @@ BACKTRACKING CONTROL
|
|||||||
If the subject is "aaaac...", after the first match attempt fails
|
If the subject is "aaaac...", after the first match attempt fails
|
||||||
(starting at the first character in the string), the starting point
|
(starting at the first character in the string), the starting point
|
||||||
skips on to start the next attempt at "c". Note that a possessive quan-
|
skips on to start the next attempt at "c". Note that a possessive quan-
|
||||||
tifer does not have the same effect as this example; although it would
|
tifier does not have the same effect as this example; although it would
|
||||||
suppress backtracking during the first match attempt, the second at-
|
suppress backtracking during the first match attempt, the second at-
|
||||||
tempt would start at the second character instead of skipping on to
|
tempt would start at the second character instead of skipping on to
|
||||||
"c".
|
"c".
|
||||||
@ -7793,7 +7793,7 @@ PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P
|
|||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
Xuc Univerally-named character: one that can be
|
Xuc Universally-named character: one that can be
|
||||||
represented by a Universal Character Name
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
|
|
||||||
@ -8571,7 +8571,7 @@ CONTROLLING THE JIT STACK
|
|||||||
This is a suggestion for how a multithreaded program that needs to set
|
This is a suggestion for how a multithreaded program that needs to set
|
||||||
up non-default JIT stacks might operate:
|
up non-default JIT stacks might operate:
|
||||||
|
|
||||||
During thread initalization
|
During thread initialization
|
||||||
thread_local_var = pcre_jit_stack_alloc(...)
|
thread_local_var = pcre_jit_stack_alloc(...)
|
||||||
|
|
||||||
During thread exit
|
During thread exit
|
||||||
|
@ -1699,7 +1699,7 @@ very long time, and so the \fImatch_limit\fP value is also used in this case
|
|||||||
.P
|
.P
|
||||||
The default value for the limit can be set when PCRE is built; the default
|
The default value for the limit can be set when PCRE is built; the default
|
||||||
default is 10 million, which handles all but the most extreme cases. You can
|
default is 10 million, which handles all but the most extreme cases. You can
|
||||||
override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
|
override the default by supplying \fBpcre_exec()\fP with a \fBpcre_extra\fP
|
||||||
block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
|
||||||
the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
|
the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
|
||||||
PCRE_ERROR_MATCHLIMIT.
|
PCRE_ERROR_MATCHLIMIT.
|
||||||
@ -1726,7 +1726,7 @@ and is ignored, when matching is done using JIT compiled code.
|
|||||||
.P
|
.P
|
||||||
The default value for \fImatch_limit_recursion\fP can be set when PCRE is
|
The default value for \fImatch_limit_recursion\fP can be set when PCRE is
|
||||||
built; the default default is the same value as the default for
|
built; the default default is the same value as the default for
|
||||||
\fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP
|
\fImatch_limit\fP. You can override the default by supplying \fBpcre_exec()\fP
|
||||||
with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
|
with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
|
||||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
|
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
|
||||||
is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
|
is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
|
||||||
@ -2044,10 +2044,10 @@ lookbehind. For example, consider the pattern
|
|||||||
.sp
|
.sp
|
||||||
which finds occurrences of "iss" in the middle of words. (\eB matches only if
|
which finds occurrences of "iss" in the middle of words. (\eB matches only if
|
||||||
the current position in the subject is not a word boundary.) When applied to
|
the current position in the subject is not a word boundary.) When applied to
|
||||||
the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first
|
the string "Mississippi" the first call to \fBpcre_exec()\fP finds the first
|
||||||
occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
|
occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
|
||||||
subject, namely "issipi", it does not match, because \eB is always false at the
|
subject, namely "issippi", it does not match, because \eB is always false at
|
||||||
start of the subject, which is deemed to be a word boundary. However, if
|
the start of the subject, which is deemed to be a word boundary. However, if
|
||||||
\fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
|
\fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
|
||||||
set to 4, it finds the second occurrence of "iss" because it is able to look
|
set to 4, it finds the second occurrence of "iss" because it is able to look
|
||||||
behind the starting point to discover that it is preceded by a letter.
|
behind the starting point to discover that it is preceded by a letter.
|
||||||
|
@ -161,7 +161,7 @@ if (rc < 0)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\enMatch succeeded at offset %d\en", ovector[0]);
|
printf("\enMatch succeeded at offset %d\en", ovector[0]);
|
||||||
|
|
||||||
@ -379,7 +379,7 @@ for (;;)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\enMatch succeeded again at offset %d\en", ovector[0]);
|
printf("\enMatch succeeded again at offset %d\en", ovector[0]);
|
||||||
|
|
||||||
|
@ -285,7 +285,7 @@ recommended.
|
|||||||
This is a suggestion for how a multithreaded program that needs to set up
|
This is a suggestion for how a multithreaded program that needs to set up
|
||||||
non-default JIT stacks might operate:
|
non-default JIT stacks might operate:
|
||||||
.sp
|
.sp
|
||||||
During thread initalization
|
During thread initialization
|
||||||
thread_local_var = pcre_jit_stack_alloc(...)
|
thread_local_var = pcre_jit_stack_alloc(...)
|
||||||
.sp
|
.sp
|
||||||
During thread exit
|
During thread exit
|
||||||
|
@ -656,7 +656,7 @@ recognized.
|
|||||||
.P
|
.P
|
||||||
It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
|
It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
|
||||||
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
|
||||||
either at compile time or when the pattern is matched. (BSR is an abbrevation
|
either at compile time or when the pattern is matched. (BSR is an abbreviation
|
||||||
for "backslash R".) This can be made the default when PCRE is built; if this is
|
for "backslash R".) This can be made the default when PCRE is built; if this is
|
||||||
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
|
||||||
It is also possible to specify these settings by starting a pattern string with
|
It is also possible to specify these settings by starting a pattern string with
|
||||||
@ -968,7 +968,7 @@ additional characters according to the following rules for ending a cluster:
|
|||||||
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters
|
||||||
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
are of five types: L, V, T, LV, and LVT. An L character may be followed by an
|
||||||
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
L, V, LV, or LVT character; an LV or V character may be followed by a V or T
|
||||||
character; an LVT or T character may be follwed only by a T character.
|
character; an LVT or T character may be followed only by a T character.
|
||||||
.P
|
.P
|
||||||
4. Do not end before extending characters or spacing marks. Characters with
|
4. Do not end before extending characters or spacing marks. Characters with
|
||||||
the "mark" property always have the "extend" grapheme breaking property.
|
the "mark" property always have the "extend" grapheme breaking property.
|
||||||
@ -3115,7 +3115,7 @@ successful match. Consider:
|
|||||||
.sp
|
.sp
|
||||||
If the subject is "aaaac...", after the first match attempt fails (starting at
|
If the subject is "aaaac...", after the first match attempt fails (starting at
|
||||||
the first character in the string), the starting point skips on to start the
|
the first character in the string), the starting point skips on to start the
|
||||||
next attempt at "c". Note that a possessive quantifer does not have the same
|
next attempt at "c". Note that a possessive quantifier does not have the same
|
||||||
effect as this example; although it would suppress backtracking during the
|
effect as this example; although it would suppress backtracking during the
|
||||||
first match attempt, the second attempt would start at the second character
|
first match attempt, the second attempt would start at the second character
|
||||||
instead of skipping on to "c".
|
instead of skipping on to "c".
|
||||||
|
@ -123,7 +123,7 @@ is changed to use Unicode properties and they match many more characters.
|
|||||||
Xan Alphanumeric: union of properties L and N
|
Xan Alphanumeric: union of properties L and N
|
||||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||||
Xuc Univerally-named character: one that can be
|
Xuc Universally-named character: one that can be
|
||||||
represented by a Universal Character Name
|
represented by a Universal Character Name
|
||||||
Xwd Perl word: property Xan or underscore
|
Xwd Perl word: property Xan or underscore
|
||||||
.sp
|
.sp
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2020 University of Cambridge
|
Copyright (c) 1997-2021 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -9102,6 +9102,8 @@ pcre_uchar cworkspace[COMPILE_WORK_SIZE];
|
|||||||
similar way to cworkspace, it can be expanded using malloc() if necessary. */
|
similar way to cworkspace, it can be expanded using malloc() if necessary. */
|
||||||
|
|
||||||
named_group named_groups[NAMED_GROUP_LIST_SIZE];
|
named_group named_groups[NAMED_GROUP_LIST_SIZE];
|
||||||
|
cd->named_groups = named_groups;
|
||||||
|
cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
|
||||||
|
|
||||||
/* Set this early so that early errors get offset 0. */
|
/* Set this early so that early errors get offset 0. */
|
||||||
|
|
||||||
@ -9375,8 +9377,6 @@ cd->hwm = cworkspace;
|
|||||||
cd->iscondassert = FALSE;
|
cd->iscondassert = FALSE;
|
||||||
cd->start_workspace = cworkspace;
|
cd->start_workspace = cworkspace;
|
||||||
cd->workspace_size = COMPILE_WORK_SIZE;
|
cd->workspace_size = COMPILE_WORK_SIZE;
|
||||||
cd->named_groups = named_groups;
|
|
||||||
cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
|
|
||||||
cd->start_pattern = (const pcre_uchar *)pattern;
|
cd->start_pattern = (const pcre_uchar *)pattern;
|
||||||
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
|
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
|
||||||
cd->req_varyopt = 0;
|
cd->req_varyopt = 0;
|
||||||
@ -9487,6 +9487,7 @@ if (cd->names_found > 0)
|
|||||||
add_name(cd, ng->name, ng->length, ng->number);
|
add_name(cd, ng->name, ng->length, ng->number);
|
||||||
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
||||||
(PUBL(free))((void *)cd->named_groups);
|
(PUBL(free))((void *)cd->named_groups);
|
||||||
|
cd->named_group_list_size = 0; /* So we don't free it twice */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
||||||
@ -9637,6 +9638,8 @@ if (errorcode != 0)
|
|||||||
{
|
{
|
||||||
(PUBL(free))(re);
|
(PUBL(free))(re);
|
||||||
PCRE_EARLY_ERROR_RETURN:
|
PCRE_EARLY_ERROR_RETURN:
|
||||||
|
if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
|
||||||
|
(PUBL(free))((void *)cd->named_groups);
|
||||||
*erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
|
*erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
|
||||||
PCRE_EARLY_ERROR_RETURN2:
|
PCRE_EARLY_ERROR_RETURN2:
|
||||||
*errorptr = find_error_text(errorcode);
|
*errorptr = find_error_text(errorcode);
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2018 University of Cambridge
|
Copyright (c) 1997-2021 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -764,7 +764,7 @@ for (;;)
|
|||||||
md->mark = NULL; /* In case previously set by assertion */
|
md->mark = NULL; /* In case previously set by assertion */
|
||||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
|
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
|
||||||
eptrb, RM55);
|
eptrb, RM55);
|
||||||
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
|
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT || rrc == MATCH_KETRPOS) &&
|
||||||
md->mark == NULL) md->mark = ecode + 2;
|
md->mark == NULL) md->mark = ecode + 2;
|
||||||
|
|
||||||
/* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
|
/* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
|
||||||
|
@ -144,7 +144,7 @@ if (rc < 0)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
@ -362,7 +362,7 @@ for (;;)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded */
|
/* Match succeeded */
|
||||||
|
|
||||||
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||||
|
|
||||||
|
3
pcre/testdata/testinput1
vendored
3
pcre/testdata/testinput1
vendored
@ -5757,4 +5757,7 @@ AbcdCBefgBhiBqz
|
|||||||
/(?&word)* \. (?<word> \w+ )/xi
|
/(?&word)* \. (?<word> \w+ )/xi
|
||||||
pokus.hokus
|
pokus.hokus
|
||||||
|
|
||||||
|
/(?:A(*:X))*+/K
|
||||||
|
A
|
||||||
|
|
||||||
/-- End of testinput1 --/
|
/-- End of testinput1 --/
|
||||||
|
5
pcre/testdata/testoutput1
vendored
5
pcre/testdata/testoutput1
vendored
@ -9470,4 +9470,9 @@ No match
|
|||||||
0: pokus.hokus
|
0: pokus.hokus
|
||||||
1: hokus
|
1: hokus
|
||||||
|
|
||||||
|
/(?:A(*:X))*+/K
|
||||||
|
A
|
||||||
|
0: A
|
||||||
|
MK: X
|
||||||
|
|
||||||
/-- End of testinput1 --/
|
/-- End of testinput1 --/
|
||||||
|
Reference in New Issue
Block a user