mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
8.39
This commit is contained in:
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -485,7 +485,7 @@ static const char error_texts[] =
|
||||
"lookbehind assertion is not fixed length\0"
|
||||
"malformed number or name after (?(\0"
|
||||
"conditional group contains more than two branches\0"
|
||||
"assertion expected after (?(\0"
|
||||
"assertion expected after (?( or (?(?C)\0"
|
||||
"(?R or (?[+-]digits must be followed by )\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
@ -560,6 +560,7 @@ static const char error_texts[] =
|
||||
/* 85 */
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"regular expression is too complicated\0"
|
||||
;
|
||||
|
||||
/* Table to identify digits and hex digits. This is used when compiling
|
||||
@ -4566,6 +4567,10 @@ for (;; ptr++)
|
||||
pcre_uint32 ec;
|
||||
pcre_uchar mcbuffer[8];
|
||||
|
||||
/* Come here to restart the loop without advancing the pointer. */
|
||||
|
||||
REDO_LOOP:
|
||||
|
||||
/* Get next character in the pattern */
|
||||
|
||||
c = *ptr;
|
||||
@ -4591,7 +4596,8 @@ for (;; ptr++)
|
||||
if (code > cd->start_workspace + cd->workspace_size -
|
||||
WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */
|
||||
{
|
||||
*errorcodeptr = ERR52;
|
||||
*errorcodeptr = (code >= cd->start_workspace + cd->workspace_size)?
|
||||
ERR52 : ERR87;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
@ -4645,9 +4651,10 @@ for (;; ptr++)
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* If in \Q...\E, check for the end; if not, we have a literal */
|
||||
/* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
|
||||
isolated \E is ignored. */
|
||||
|
||||
if (inescq && c != CHAR_NULL)
|
||||
if (c != CHAR_NULL)
|
||||
{
|
||||
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
|
||||
{
|
||||
@ -4655,7 +4662,7 @@ for (;; ptr++)
|
||||
ptr++;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
else if (inescq)
|
||||
{
|
||||
if (previous_callout != NULL)
|
||||
{
|
||||
@ -4670,18 +4677,27 @@ for (;; ptr++)
|
||||
}
|
||||
goto NORMAL_CHAR;
|
||||
}
|
||||
/* Control does not reach here. */
|
||||
|
||||
/* Check for the start of a \Q...\E sequence. We must do this here rather
|
||||
than later in case it is immediately followed by \E, which turns it into a
|
||||
"do nothing" sequence. */
|
||||
|
||||
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
|
||||
{
|
||||
inescq = TRUE;
|
||||
ptr++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* In extended mode, skip white space and comments. We need a loop in order
|
||||
to check for more white space and more comments after a comment. */
|
||||
/* In extended mode, skip white space and comments. */
|
||||
|
||||
if ((options & PCRE_EXTENDED) != 0)
|
||||
{
|
||||
for (;;)
|
||||
const pcre_uchar *wscptr = ptr;
|
||||
while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
|
||||
if (c == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
|
||||
if (c != CHAR_NUMBER_SIGN) break;
|
||||
ptr++;
|
||||
while (*ptr != CHAR_NULL)
|
||||
{
|
||||
@ -4695,8 +4711,29 @@ for (;; ptr++)
|
||||
if (utf) FORWARDCHAR(ptr);
|
||||
#endif
|
||||
}
|
||||
c = *ptr; /* Either NULL or the char after a newline */
|
||||
}
|
||||
|
||||
/* If we skipped any characters, restart the loop. Otherwise, we didn't see
|
||||
a comment. */
|
||||
|
||||
if (ptr > wscptr) goto REDO_LOOP;
|
||||
}
|
||||
|
||||
/* Skip over (?# comments. We need to do this here because we want to know if
|
||||
the next thing is a quantifier, and these comments may come between an item
|
||||
and its quantifier. */
|
||||
|
||||
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
|
||||
ptr[2] == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
ptr += 3;
|
||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
||||
if (*ptr == CHAR_NULL)
|
||||
{
|
||||
*errorcodeptr = ERR18;
|
||||
goto FAILED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* See if the next thing is a quantifier. */
|
||||
@ -4820,15 +4857,15 @@ for (;; ptr++)
|
||||
if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
|
||||
{
|
||||
nestptr = ptr + 7;
|
||||
ptr = sub_start_of_word - 1;
|
||||
continue;
|
||||
ptr = sub_start_of_word;
|
||||
goto REDO_LOOP;
|
||||
}
|
||||
|
||||
if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
|
||||
{
|
||||
nestptr = ptr + 7;
|
||||
ptr = sub_end_of_word - 1;
|
||||
continue;
|
||||
ptr = sub_end_of_word;
|
||||
goto REDO_LOOP;
|
||||
}
|
||||
|
||||
/* Handle a real character class. */
|
||||
@ -5046,20 +5083,22 @@ for (;; ptr++)
|
||||
ptr = tempptr + 1;
|
||||
continue;
|
||||
|
||||
/* For the other POSIX classes (ascii, xdigit) we are going to fall
|
||||
through to the non-UCP case and build a bit map for characters with
|
||||
code points less than 256. If we are in a negated POSIX class
|
||||
within a non-negated overall class, characters with code points
|
||||
greater than 255 must all match. In the special case where we have
|
||||
not yet generated any xclass data, and this is the final item in
|
||||
the overall class, we need do nothing: later on, the opcode
|
||||
/* For the other POSIX classes (ascii, cntrl, xdigit) we are going
|
||||
to fall through to the non-UCP case and build a bit map for
|
||||
characters with code points less than 256. If we are in a negated
|
||||
POSIX class, characters with code points greater than 255 must
|
||||
either all match or all not match. In the special case where we
|
||||
have not yet generated any xclass data, and this is the final item
|
||||
in the overall class, we need do nothing: later on, the opcode
|
||||
OP_NCLASS will be used to indicate that characters greater than 255
|
||||
are acceptable. If we have already seen an xclass item or one may
|
||||
follow (we have to assume that it might if this is not the end of
|
||||
the class), explicitly match all wide codepoints. */
|
||||
the class), explicitly list all wide codepoints, which will then
|
||||
either not match or match, depending on whether the class is or is
|
||||
not negated. */
|
||||
|
||||
default:
|
||||
if (!negate_class && local_negate &&
|
||||
if (local_negate &&
|
||||
(xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
@ -6529,21 +6568,6 @@ for (;; ptr++)
|
||||
case CHAR_LEFT_PARENTHESIS:
|
||||
ptr++;
|
||||
|
||||
/* First deal with comments. Putting this code right at the start ensures
|
||||
that comments have no bad side effects. */
|
||||
|
||||
if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
ptr += 2;
|
||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
||||
if (*ptr == CHAR_NULL)
|
||||
{
|
||||
*errorcodeptr = ERR18;
|
||||
goto FAILED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Now deal with various "verbs" that can be introduced by '*'. */
|
||||
|
||||
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
|
||||
@ -6604,8 +6628,21 @@ for (;; ptr++)
|
||||
cd->had_accept = TRUE;
|
||||
for (oc = cd->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
*code++ = OP_CLOSE;
|
||||
PUT2INC(code, 0, oc->number);
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
#ifdef COMPILE_PCRE8
|
||||
*lengthptr += 1 + IMM2_SIZE;
|
||||
#elif defined COMPILE_PCRE16
|
||||
*lengthptr += 2 + IMM2_SIZE;
|
||||
#elif defined COMPILE_PCRE32
|
||||
*lengthptr += 4 + IMM2_SIZE;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = OP_CLOSE;
|
||||
PUT2INC(code, 0, oc->number);
|
||||
}
|
||||
}
|
||||
setverb = *code++ =
|
||||
(cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
|
||||
@ -6734,6 +6771,15 @@ for (;; ptr++)
|
||||
for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
|
||||
if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
|
||||
tempptr += i + 1;
|
||||
|
||||
/* tempptr should now be pointing to the opening parenthesis of the
|
||||
assertion condition. */
|
||||
|
||||
if (*tempptr != CHAR_LEFT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR28;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
/* For conditions that are assertions, check the syntax, and then exit
|
||||
@ -7258,7 +7304,7 @@ for (;; ptr++)
|
||||
issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
|
||||
only mode, we finesse the bug by allowing more memory always. */
|
||||
|
||||
*lengthptr += 2 + 2*LINK_SIZE;
|
||||
*lengthptr += 4 + 4*LINK_SIZE;
|
||||
|
||||
/* It is even worse than that. The current reference may be to an
|
||||
existing named group with a different number (so apparently not
|
||||
@ -7274,7 +7320,12 @@ for (;; ptr++)
|
||||
so far in order to get the number. If the name is not found, leave
|
||||
the value of recno as 0 for a forward reference. */
|
||||
|
||||
else
|
||||
/* This patch (removing "else") fixes a problem when a reference is
|
||||
to multiple identically named nested groups from within the nest.
|
||||
Once again, it is not the "proper" fix, and it results in an
|
||||
over-allocation of memory. */
|
||||
|
||||
/* else */
|
||||
{
|
||||
ng = cd->named_groups;
|
||||
for (i = 0; i < cd->names_found; i++, ng++)
|
||||
@ -7585,39 +7636,15 @@ for (;; ptr++)
|
||||
newoptions = (options | set) & (~unset);
|
||||
|
||||
/* If the options ended with ')' this is not the start of a nested
|
||||
group with option changes, so the options change at this level. If this
|
||||
item is right at the start of the pattern, the options can be
|
||||
abstracted and made external in the pre-compile phase, and ignored in
|
||||
the compile phase. This can be helpful when matching -- for instance in
|
||||
caseless checking of required bytes.
|
||||
|
||||
If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
|
||||
definitely *not* at the start of the pattern because something has been
|
||||
compiled. In the pre-compile phase, however, the code pointer can have
|
||||
that value after the start, because it gets reset as code is discarded
|
||||
during the pre-compile. However, this can happen only at top level - if
|
||||
we are within parentheses, the starting BRA will still be present. At
|
||||
any parenthesis level, the length value can be used to test if anything
|
||||
has been compiled at that level. Thus, a test for both these conditions
|
||||
is necessary to ensure we correctly detect the start of the pattern in
|
||||
both phases.
|
||||
|
||||
group with option changes, so the options change at this level.
|
||||
If we are not at the pattern start, reset the greedy defaults and the
|
||||
case value for firstchar and reqchar. */
|
||||
|
||||
if (*ptr == CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
if (code == cd->start_code + 1 + LINK_SIZE &&
|
||||
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
|
||||
{
|
||||
cd->external_options = newoptions;
|
||||
}
|
||||
else
|
||||
{
|
||||
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
|
||||
}
|
||||
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. */
|
||||
@ -7896,16 +7923,6 @@ for (;; ptr++)
|
||||
c = ec;
|
||||
else
|
||||
{
|
||||
if (escape == ESC_Q) /* Handle start of quoted string */
|
||||
{
|
||||
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
|
||||
ptr += 2; /* avoid empty string */
|
||||
else inescq = TRUE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (escape == ESC_E) continue; /* Perl ignores an orphan \E */
|
||||
|
||||
/* For metasequences that actually match a character, we disable the
|
||||
setting of a first character if it hasn't already been set. */
|
||||
|
||||
|
Reference in New Issue
Block a user