mirror of
https://github.com/apache/httpd.git
synced 2025-07-29 09:01:18 +03:00
Introduce ap_rxplus class: higher-level regexps supporting perl-style
regexp operations. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@999533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
4
CHANGES
4
CHANGES
@ -64,6 +64,10 @@ Changes with Apache 2.3.9
|
|||||||
and sub-directories of matched directories are no longer implicitly
|
and sub-directories of matched directories are no longer implicitly
|
||||||
matched. PR49809 [Eric Covener]
|
matched. PR49809 [Eric Covener]
|
||||||
|
|
||||||
|
*) Regexps: introduce new higher-level regexp utility including parsing
|
||||||
|
and executing perl-style regexp ops (e.g s/foo/bar/i) and regexp memory
|
||||||
|
[Nick Kew]
|
||||||
|
|
||||||
Changes with Apache 2.3.8
|
Changes with Apache 2.3.8
|
||||||
|
|
||||||
*) suexec: Support large log files. PR 45856. [Stefan Fritsch]
|
*) suexec: Support large log files. PR 45856. [Stefan Fritsch]
|
||||||
|
@ -63,7 +63,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Options for ap_regexec: */
|
/* Options for ap_regcomp, ap_regexec, and ap_rxplus versions: */
|
||||||
|
|
||||||
#define AP_REG_ICASE 0x01 /** use a case-insensitive match */
|
#define AP_REG_ICASE 0x01 /** use a case-insensitive match */
|
||||||
#define AP_REG_NEWLINE 0x02 /** don't match newlines against '.' etc */
|
#define AP_REG_NEWLINE 0x02 /** don't match newlines against '.' etc */
|
||||||
@ -73,6 +73,10 @@ extern "C" {
|
|||||||
#define AP_REG_EXTENDED (0) /** unused */
|
#define AP_REG_EXTENDED (0) /** unused */
|
||||||
#define AP_REG_NOSUB (0) /** unused */
|
#define AP_REG_NOSUB (0) /** unused */
|
||||||
|
|
||||||
|
#define AP_REG_MULTI 0x10 /* perl's /g (needs fixing) */
|
||||||
|
#define AP_REG_NOMEM 0x20 /* nomem in our code */
|
||||||
|
#define AP_REG_DOTALL 0x40 /* perl's /s flag */
|
||||||
|
|
||||||
/* Error values: */
|
/* Error values: */
|
||||||
enum {
|
enum {
|
||||||
AP_REG_ASSERT = 1, /** internal error ? */
|
AP_REG_ASSERT = 1, /** internal error ? */
|
||||||
@ -134,6 +138,80 @@ AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg,
|
|||||||
*/
|
*/
|
||||||
AP_DECLARE(void) ap_regfree(ap_regex_t *preg);
|
AP_DECLARE(void) ap_regfree(ap_regex_t *preg);
|
||||||
|
|
||||||
|
/* ap_rxplus: higher-level regexps */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
ap_regex_t rx;
|
||||||
|
apr_uint32_t flags;
|
||||||
|
const char *subs;
|
||||||
|
const char *match;
|
||||||
|
apr_size_t nmatch;
|
||||||
|
ap_regmatch_t *pmatch;
|
||||||
|
} ap_rxplus_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compile a pattern into a regexp.
|
||||||
|
* supports perl-like formats
|
||||||
|
* match-string
|
||||||
|
* /match-string/flags
|
||||||
|
* s/match-string/replacement-string/flags
|
||||||
|
* Intended to support more perl-like stuff as and when round tuits happen
|
||||||
|
* match-string is anything supported by ap_regcomp
|
||||||
|
* replacement-string is a substitution string as supported in ap_pregsub
|
||||||
|
* flags should correspond with perl syntax: treat failure to do so as a bug
|
||||||
|
* (documentation TBD)
|
||||||
|
* @param pool Pool to allocate from
|
||||||
|
* @param pattern Pattern to compile
|
||||||
|
* @return Compiled regexp, or NULL in case of compile/syntax error
|
||||||
|
*/
|
||||||
|
AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool, const char *pattern);
|
||||||
|
/**
|
||||||
|
* Apply a regexp operation to a string.
|
||||||
|
* @param pool Pool to allocate from
|
||||||
|
* @param rx The regex match to apply
|
||||||
|
* @param pattern The string to apply it to
|
||||||
|
* NOTE: This MUST be kept in scope to use regexp memory
|
||||||
|
* @param newpattern The modified string (ignored if the operation doesn't
|
||||||
|
* modify the string)
|
||||||
|
* @return Number of times a match happens. Normally 0 (no match) or 1
|
||||||
|
* (match found), but may be greater if a transforming pattern
|
||||||
|
* is applied with the 'g' flag.
|
||||||
|
*/
|
||||||
|
AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx,
|
||||||
|
const char *pattern, char **newpattern);
|
||||||
|
#ifdef DOXYGEN
|
||||||
|
/**
|
||||||
|
* Number of matches in the regexp operation's memory
|
||||||
|
* This may be 0 if no match is in memory, or up to nmatch from compilation
|
||||||
|
* @param rx The regexp
|
||||||
|
* @return Number of matches in memory
|
||||||
|
*/
|
||||||
|
AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx);
|
||||||
|
#else
|
||||||
|
#define ap_rxplus_nmatch(rx) (((rx)->match != NULL) ? (rx)->nmatch : 0)
|
||||||
|
#endif
|
||||||
|
/**
|
||||||
|
* Get a pointer to a match from regex memory
|
||||||
|
* NOTE: this relies on the match pattern from the last call to
|
||||||
|
* ap_rxplus_exec still being valid (i.e. not freed or out-of-scope)
|
||||||
|
* @param rx The regexp
|
||||||
|
* @param n The match number to retrieve (must be between 0 and nmatch)
|
||||||
|
* @param len Returns the length of the match.
|
||||||
|
* @param match Returns the match pattern
|
||||||
|
*/
|
||||||
|
AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len,
|
||||||
|
const char **match);
|
||||||
|
/**
|
||||||
|
* Get a match from regex memory in a string copy
|
||||||
|
* NOTE: this relies on the match pattern from the last call to
|
||||||
|
* ap_rxplus_exec still being valid (i.e. not freed or out-of-scope)
|
||||||
|
* @param pool Pool to allocate from
|
||||||
|
* @param rx The regexp
|
||||||
|
* @param n The match number to retrieve (must be between 0 and nmatch)
|
||||||
|
* @return The matched string
|
||||||
|
*/
|
||||||
|
AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
@ -12,7 +12,7 @@ LTLIBRARY_SOURCES = \
|
|||||||
util_script.c util_md5.c util_cfgtree.c util_ebcdic.c util_time.c \
|
util_script.c util_md5.c util_cfgtree.c util_ebcdic.c util_time.c \
|
||||||
connection.c listen.c util_mutex.c mpm_common.c mpm_unix.c \
|
connection.c listen.c util_mutex.c mpm_common.c mpm_unix.c \
|
||||||
util_charset.c util_cookies.c util_debug.c util_xml.c \
|
util_charset.c util_cookies.c util_debug.c util_xml.c \
|
||||||
util_expr.c util_filter.c util_pcre.c exports.c \
|
util_expr.c util_filter.c util_pcre.c util_regex.c exports.c \
|
||||||
scoreboard.c error_bucket.c protocol.c core.c request.c provider.c \
|
scoreboard.c error_bucket.c protocol.c core.c request.c provider.c \
|
||||||
eoc_bucket.c eor_bucket.c core_filters.c
|
eoc_bucket.c eor_bucket.c core_filters.c
|
||||||
|
|
||||||
|
@ -128,6 +128,7 @@ int options = 0;
|
|||||||
|
|
||||||
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
|
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
|
||||||
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
||||||
|
if ((cflags & AP_REG_DOTALL) != 0) options |= PCRE_DOTALL;
|
||||||
|
|
||||||
preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
|
preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
|
||||||
preg->re_erroffset = erroffset;
|
preg->re_erroffset = erroffset;
|
||||||
|
261
server/util_regex.c
Normal file
261
server/util_regex.c
Normal file
@ -0,0 +1,261 @@
|
|||||||
|
/* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "apr.h"
|
||||||
|
#include "apr_lib.h"
|
||||||
|
#include "apr_pools.h"
|
||||||
|
#include "apr_strings.h"
|
||||||
|
#include "ap_config.h"
|
||||||
|
#include "ap_regex.h"
|
||||||
|
#include "httpd.h"
|
||||||
|
|
||||||
|
AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool,
|
||||||
|
const char *pattern)
|
||||||
|
{
|
||||||
|
/* perl style patterns
|
||||||
|
* add support for more as and when wanted
|
||||||
|
* substitute: s/rx/subs/
|
||||||
|
* match: m/rx/ or just /rx/
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* allow any nonalnum delimiter as first or second char.
|
||||||
|
* If we ever use this with non-string pattern we'll need an extra check
|
||||||
|
*/
|
||||||
|
const char *endp = 0;
|
||||||
|
const char *str = pattern;
|
||||||
|
const char *rxstr;
|
||||||
|
ap_rxplus_t *ret = apr_pcalloc(pool, sizeof(ap_rxplus_t));
|
||||||
|
char delim = 0;
|
||||||
|
enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH;
|
||||||
|
if (!apr_isalnum(pattern[0])) {
|
||||||
|
delim = *str++;
|
||||||
|
}
|
||||||
|
else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) {
|
||||||
|
action = SUBSTITUTE;
|
||||||
|
delim = pattern[1];
|
||||||
|
str += 2;
|
||||||
|
}
|
||||||
|
else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) {
|
||||||
|
delim = pattern[1];
|
||||||
|
str += 2;
|
||||||
|
}
|
||||||
|
/* TODO: support perl's after/before */
|
||||||
|
/* FIXME: fix these simplminded delims */
|
||||||
|
|
||||||
|
/* we think there's a delimiter. Allow for it not to be if unmatched */
|
||||||
|
if (delim) {
|
||||||
|
endp = ap_strchr_c(str, delim);
|
||||||
|
}
|
||||||
|
if (!endp) { /* there's no delim or flags */
|
||||||
|
if (ap_regcomp(&ret->rx, pattern, 0) == 0) {
|
||||||
|
apr_pool_cleanup_register(pool, &ret->rx, (void*) ap_regfree,
|
||||||
|
apr_pool_cleanup_null);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We have a delimiter. Use it to extract the regexp */
|
||||||
|
rxstr = apr_pstrndup(pool, str, endp-str);
|
||||||
|
|
||||||
|
/* If it's a substitution, we need the replacement string
|
||||||
|
* TODO: possible future enhancement - support other parsing
|
||||||
|
* in the replacement string.
|
||||||
|
*/
|
||||||
|
if (action == SUBSTITUTE) {
|
||||||
|
str = endp+1;
|
||||||
|
if (!*str || (endp = ap_strchr_c(str, delim), !endp)) {
|
||||||
|
/* missing replacement string is an error */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
ret->subs = apr_pstrndup(pool, str, (endp-str));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* anything after the current delimiter is flags */
|
||||||
|
while (*++endp) {
|
||||||
|
switch (*endp) {
|
||||||
|
case 'i': ret->flags |= AP_REG_ICASE; break;
|
||||||
|
case 'm': ret->flags |= AP_REG_NEWLINE; break;
|
||||||
|
case 'n': ret->flags |= AP_REG_NOMEM; break;
|
||||||
|
case 'g': ret->flags |= AP_REG_MULTI; break;
|
||||||
|
case 's': ret->flags |= AP_REG_DOTALL; break;
|
||||||
|
case '^': ret->flags |= AP_REG_NOTBOL; break;
|
||||||
|
case '$': ret->flags |= AP_REG_NOTEOL; break;
|
||||||
|
default: break; /* we should probably be stricter here */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ap_regcomp(&ret->rx, rxstr, ret->flags) == 0) {
|
||||||
|
apr_pool_cleanup_register(pool, &ret->rx, (void*) ap_regfree,
|
||||||
|
apr_pool_cleanup_null);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (!(ret->flags & AP_REG_NOMEM)) {
|
||||||
|
/* count size of memory required, starting at 1 for the whole-match
|
||||||
|
* Simpleminded should be fine 'cos regcomp already checked syntax
|
||||||
|
*/
|
||||||
|
ret->nmatch = 1;
|
||||||
|
while (*rxstr) {
|
||||||
|
switch (*rxstr++) {
|
||||||
|
case '\\': /* next char is escaped - skip it */
|
||||||
|
if (*rxstr != 0) {
|
||||||
|
++rxstr;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '(': /* unescaped bracket implies memory */
|
||||||
|
++ret->nmatch;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx,
|
||||||
|
const char *pattern, char **newpattern)
|
||||||
|
//int max_iterations)
|
||||||
|
{
|
||||||
|
#if 1
|
||||||
|
int ret = 1;
|
||||||
|
int startl, oldl, newl, diffsz;
|
||||||
|
const char *remainder;
|
||||||
|
char *subs;
|
||||||
|
/* snrf process_regexp from mod_headers */
|
||||||
|
if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) {
|
||||||
|
rx->match = NULL;
|
||||||
|
return 0; /* no match, nothing to do */
|
||||||
|
}
|
||||||
|
rx->match = pattern;
|
||||||
|
if (rx->subs) {
|
||||||
|
*newpattern = ap_pregsub(pool, rx->subs, pattern,
|
||||||
|
rx->nmatch, rx->pmatch);
|
||||||
|
if (!*newpattern) {
|
||||||
|
return 0; /* FIXME - should we do more to handle error? */
|
||||||
|
}
|
||||||
|
startl = rx->pmatch[0].rm_so;
|
||||||
|
oldl = rx->pmatch[0].rm_eo - startl;
|
||||||
|
newl = strlen(*newpattern);
|
||||||
|
diffsz = newl - oldl;
|
||||||
|
remainder = pattern + startl + oldl;
|
||||||
|
if (rx->flags & AP_REG_MULTI) {
|
||||||
|
/* recurse to do any further matches */
|
||||||
|
char *subs;
|
||||||
|
ret += ap_rxplus_exec(pool, rx, remainder, &subs);
|
||||||
|
if (ret > 1) {
|
||||||
|
/* a further substitution happened */
|
||||||
|
diffsz += strlen(subs) - strlen(remainder);
|
||||||
|
remainder = subs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
subs = apr_palloc(pool, strlen(pattern) + 1 + diffsz);
|
||||||
|
memcpy(subs, pattern, startl);
|
||||||
|
memcpy(subs+startl, *newpattern, newl);
|
||||||
|
strcpy(subs+startl+newl, remainder);
|
||||||
|
*newpattern = subs;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (!(rx->flags & AP_REG_MULTI) || (rx->subs == NULL)) {
|
||||||
|
max_iterations = 1;
|
||||||
|
}
|
||||||
|
/* FIXME: multi-matching is incorrect */
|
||||||
|
while (max_iterations-- > 0) {
|
||||||
|
if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags)
|
||||||
|
== 0) {
|
||||||
|
ret++;
|
||||||
|
if (rx->subs) {
|
||||||
|
rx->match = pattern;
|
||||||
|
*newpattern = ap_pregsub(pool, rx->subs, pattern,
|
||||||
|
rx->nmatch, rx->pmatch);
|
||||||
|
pattern = *newpattern;
|
||||||
|
if (pattern == NULL) {
|
||||||
|
max_iterations = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
max_iterations = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == 0 || rx->flags&AP_REG_NOMEM) {
|
||||||
|
rx->match = NULL; /* no match, so don't pretend to remember a match */
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
#if 0
|
||||||
|
/* FIXME - should we be 'safe' and take the performance hit,
|
||||||
|
* or just document thou-shalt-keep-pattern-in-scope?
|
||||||
|
*/
|
||||||
|
if (rx->match == inpattern) {
|
||||||
|
rx->match = apr_pstrdup(pool, inpattern);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#ifdef DOXYGEN
|
||||||
|
AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx)
|
||||||
|
{
|
||||||
|
return (rx->match != NULL) ? rx->nmatch : 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If this blows up on you, see the notes in the header/apidoc
|
||||||
|
* rx->match is a pointer and it's your responsibility to ensure
|
||||||
|
* it hasn't gone out-of-scope since the last ap_rxplus_exec
|
||||||
|
*/
|
||||||
|
AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len,
|
||||||
|
const char **match)
|
||||||
|
{
|
||||||
|
if (n >= 0 && n < ap_rxplus_nmatch(rx)) {
|
||||||
|
*match = rx->match + rx->pmatch[n].rm_so;
|
||||||
|
*len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*len = -1;
|
||||||
|
*match = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
const char *match;
|
||||||
|
ap_rxplus_match(rx, n, &len, &match);
|
||||||
|
return (match != NULL) ? apr_pstrndup(pool, match, len) : NULL;
|
||||||
|
}
|
Reference in New Issue
Block a user