1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

pcre-8.35.tar.bz2

This commit is contained in:
Sergei Golubchik
2014-06-05 13:44:44 +02:00
parent 8958f8b823
commit 8cc5973f1a
107 changed files with 21392 additions and 4049 deletions

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
below for why this module is different).
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -1473,7 +1473,7 @@ for (;;)
goto ANYNL01;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL01:
@ -1742,7 +1742,7 @@ for (;;)
goto ANYNL02;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL02:
@ -2012,7 +2012,7 @@ for (;;)
goto ANYNL03;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL03:
@ -2210,7 +2210,7 @@ for (;;)
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
@ -3466,7 +3466,7 @@ for (;;)
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{
/* Advance to a known first char. */
/* Advance to a known first pcre_uchar (i.e. data item) */
if (has_first_char)
{
@ -3474,12 +3474,12 @@ for (;;)
{
pcre_uchar csc;
while (current_subject < end_subject &&
(csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
(csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
current_subject++;
}
else
while (current_subject < end_subject &&
RAWUCHARTEST(current_subject) != first_char)
UCHAR21TEST(current_subject) != first_char)
current_subject++;
}
@ -3509,36 +3509,26 @@ for (;;)
ANYCRLF, and we are now at a LF, advance the match position by one
more character. */
if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&
RAWUCHARTEST(current_subject) == CHAR_NL)
UCHAR21TEST(current_subject) == CHAR_NL)
current_subject++;
}
}
/* Or to a non-unique first char after study */
/* Advance to a non-unique first pcre_uchar after study */
else if (start_bits != NULL)
{
while (current_subject < end_subject)
{
register pcre_uint32 c = RAWUCHARTEST(current_subject);
register pcre_uint32 c = UCHAR21TEST(current_subject);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
if ((start_bits[c/8] & (1 << (c&7))) == 0)
{
current_subject++;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
/* In non 8-bit mode, the iteration will stop for
characters > 255 at the beginning or not stop at all. */
if (utf)
ACROSSCHAR(current_subject < end_subject, *current_subject,
current_subject++);
#endif
}
else break;
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
current_subject++;
}
}
}
@ -3557,19 +3547,20 @@ for (;;)
/* If the pattern was studied, a minimum subject length may be set. This
is a lower bound; no actual string of that length may actually match the
pattern. Although the value is, strictly, in characters, we treat it as
bytes to avoid spending too much time in this optimization. */
in pcre_uchar units to avoid spending too much time in this optimization.
*/
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
(pcre_uint32)(end_subject - current_subject) < study->minlength)
return PCRE_ERROR_NOMATCH;
/* If req_char is set, we know that that character must appear in the
subject for the match to succeed. If the first character is set, req_char
must be later in the subject; otherwise the test starts at the match
point. This optimization can save a huge amount of work in patterns with
nested unlimited repeats that aren't going to match. Writing separate
code for cased/caseless versions makes it go faster, as does using an
autoincrement and backing off on a match.
/* If req_char is set, we know that that pcre_uchar must appear in the
subject for the match to succeed. If the first pcre_uchar is set,
req_char must be later in the subject; otherwise the test starts at the
match point. This optimization can save a huge amount of work in patterns
with nested unlimited repeats that aren't going to match. Writing
separate code for cased/caseless versions makes it go faster, as does
using an autoincrement and backing off on a match.
HOWEVER: when the subject string is very, very long, searching to its end
can take a long time, and give bad performance on quite ordinary
@ -3589,7 +3580,7 @@ for (;;)
{
while (p < end_subject)
{
register pcre_uint32 pp = RAWUCHARINCTEST(p);
register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@ -3597,18 +3588,18 @@ for (;;)
{
while (p < end_subject)
{
if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}
/* If we can't find the required character, break the matching loop,
/* If we can't find the required pcre_uchar, break the matching loop,
which will cause a return or PCRE_ERROR_NOMATCH. */
if (p >= end_subject) break;
/* If we have found the required character, save the point where we
/* If we have found the required pcre_uchar, save the point where we
found it, so that we don't search again next time round the loop if
the start hasn't passed this character yet. */
the start hasn't passed this point yet. */
req_char_ptr = p;
}
@ -3665,9 +3656,9 @@ for (;;)
not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */
if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&
RAWUCHARTEST(current_subject) == CHAR_NL &&
UCHAR21TEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||