1
0
mirror of https://github.com/apache/httpd.git synced 2025-07-29 09:01:18 +03:00

ap_expr: open string expressions to the <word>.

Introduces the syntax "%{:<word>:}", borrowed from the <var>'s one, and which
likewise can be embedded anywhere in a string expression (the same reserved
character ':' gets reused in an unambiguous manner).

This allows the two types of expressions (boolean and string) to now share
fully the same language set, namely: strings, lists, vars, regexes, backrefs,
functions with multiple or complex arguments, and especially combinations
thereof.

Most of them were reserved to boolean expressions only, while complex string
constructions can also benefit to, well, strings. The <word> construct allows
that (say the syntax "%{:<word>:}" looks like a temporary variable constructed
in a string).

Since string expressions may now have to deal with lists (arrays), they also
need a way to produce/extract strings from list and vice versa. This can be
done with the new "join" and "split" operators, while the new substitution
regexes (like "s/<pattern>/<substitute>/<flags>") may be used to manipulate
strings in place. All this of course available for both string and boolean
expressions.

Tests and doc updates upcoming..



git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1810605 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yann Ylavic
2017-10-02 21:57:26 +00:00
parent 5e90ede647
commit 08ddf7ad0a
9 changed files with 2117 additions and 1238 deletions

View File

@ -76,6 +76,9 @@ extern "C" {
#define AP_REG_MULTI 0x10 /**< perl's /g (needs fixing) */ #define AP_REG_MULTI 0x10 /**< perl's /g (needs fixing) */
#define AP_REG_NOMEM 0x20 /**< nomem in our code */ #define AP_REG_NOMEM 0x20 /**< nomem in our code */
#define AP_REG_DOTALL 0x40 /**< perl's /s flag */ #define AP_REG_DOTALL 0x40 /**< perl's /s flag */
#define AP_REG_NOTEMPTY 0x080 /**< Empty match not valid */
#define AP_REG_ANCHORED 0x100 /**< Match at the first position */
#define AP_REG_MATCH "MATCH_" /**< suggested prefix for ap_regname */ #define AP_REG_MATCH "MATCH_" /**< suggested prefix for ap_regname */

View File

@ -27,6 +27,7 @@
#include "util_varbuf.h" #include "util_varbuf.h"
#include "util_expr_private.h" #include "util_expr_private.h"
#include "util_md5.h" #include "util_md5.h"
#include "util_varbuf.h"
#include "apr_lib.h" #include "apr_lib.h"
#include "apr_fnmatch.h" #include "apr_fnmatch.h"
@ -54,6 +55,8 @@ AP_IMPLEMENT_HOOK_RUN_FIRST(int, expr_lookup, (ap_expr_lookup_parms *parms),
#define LOG_MARK(info) __FILE__, __LINE__, (info)->module_index #define LOG_MARK(info) __FILE__, __LINE__, (info)->module_index
static int ap_expr_eval_cond(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node);
static const char *ap_expr_eval_string_func(ap_expr_eval_ctx_t *ctx, static const char *ap_expr_eval_string_func(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *info, const ap_expr_t *info,
const ap_expr_t *args); const ap_expr_t *args);
@ -63,6 +66,19 @@ static const char *ap_expr_eval_var(ap_expr_eval_ctx_t *ctx,
ap_expr_var_func_t *func, ap_expr_var_func_t *func,
const void *data); const void *data);
typedef struct {
int type, flags;
const ap_expr_t *subst;
} ap_expr_regctx_t;
static const char *ap_expr_regexec(const char *subject,
const ap_expr_t *reg,
apr_array_header_t *list,
ap_expr_eval_ctx_t *ctx);
static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node);
/* define AP_EXPR_DEBUG to log the parse tree when parsing an expression */ /* define AP_EXPR_DEBUG to log the parse tree when parsing an expression */
#ifdef AP_EXPR_DEBUG #ifdef AP_EXPR_DEBUG
static void expr_dump_tree(const ap_expr_t *e, const server_rec *s, static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
@ -71,7 +87,7 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
/* /*
* To reduce counting overhead, we only count calls to * To reduce counting overhead, we only count calls to
* ap_expr_eval_word() and ap_expr_eval(). The max number of * ap_expr_eval_word() and ap_expr_eval_cond(). The max number of
* stack frames is larger by some factor. * stack frames is larger by some factor.
*/ */
#define AP_EXPR_MAX_RECURSION 20 #define AP_EXPR_MAX_RECURSION 20
@ -87,6 +103,37 @@ static int inc_rec(ap_expr_eval_ctx_t *ctx)
return 1; return 1;
} }
static const char *ap_expr_list_pstrcat(apr_pool_t *p,
const apr_array_header_t *list,
const char *sep)
{
if (list->nelts <= 0) {
return NULL;
}
else if (list->nelts == 1) {
return APR_ARRAY_IDX(list, 0, const char*);
}
else {
struct ap_varbuf vb;
int n = list->nelts - 1, i;
apr_size_t slen = strlen(sep), vlen;
const char *val;
ap_varbuf_init(p, &vb, 0);
for (i = 0; i < n; ++i) {
val = APR_ARRAY_IDX(list, i, const char*);
vlen = strlen(val);
ap_varbuf_grow(&vb, vlen + slen + 1);
ap_varbuf_strmemcat(&vb, val, vlen);
ap_varbuf_strmemcat(&vb, sep, slen);
}
val = APR_ARRAY_IDX(list, n, const char*);
ap_varbuf_strmemcat(&vb, val, strlen(val));
return vb.buf;
}
}
static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx, static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node) const ap_expr_t *node)
{ {
@ -98,6 +145,12 @@ static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
case op_String: case op_String:
result = node->node_arg1; result = node->node_arg1;
break; break;
case op_Word:
result = ap_expr_eval_word(ctx, node->node_arg1);
break;
case op_Bool:
result = ap_expr_eval_cond(ctx, node->node_arg1) ? "true" : "false";
break;
case op_Var: case op_Var:
result = ap_expr_eval_var(ctx, (ap_expr_var_func_t *)node->node_arg1, result = ap_expr_eval_var(ctx, (ap_expr_var_func_t *)node->node_arg1,
node->node_arg2); node->node_arg2);
@ -168,7 +221,20 @@ static const char *ap_expr_eval_word(ap_expr_eval_ctx_t *ctx,
result = ap_expr_eval_string_func(ctx, info, args); result = ap_expr_eval_string_func(ctx, info, args);
break; break;
} }
case op_RegexBackref: { case op_Join: {
const char *sep;
apr_array_header_t *list = ap_expr_list_make(ctx, node->node_arg1);
sep = node->node_arg2 ? ap_expr_eval_word(ctx, node->node_arg2) : "";
result = ap_expr_list_pstrcat(ctx->p, list, sep);
break;
}
case op_Regsub: {
const ap_expr_t *reg = node->node_arg2;
const char *subject = ap_expr_eval_word(ctx, node->node_arg1);
result = ap_expr_regexec(subject, reg, NULL, ctx);
break;
}
case op_Regref: {
const unsigned int *np = node->node_arg1; const unsigned int *np = node->node_arg1;
result = ap_expr_eval_re_backref(ctx, *np); result = ap_expr_eval_re_backref(ctx, *np);
break; break;
@ -219,15 +285,7 @@ static const char *ap_expr_eval_string_func(ap_expr_eval_ctx_t *ctx,
if (arg->node_op == op_ListElement) { if (arg->node_op == op_ListElement) {
/* Evaluate the list elements and store them in apr_array_header. */ /* Evaluate the list elements and store them in apr_array_header. */
ap_expr_string_list_func_t *func = (ap_expr_string_list_func_t *)info->node_arg1; ap_expr_string_list_func_t *func = (ap_expr_string_list_func_t *)info->node_arg1;
apr_array_header_t *args = apr_array_make(ctx->p, 2, sizeof(char *)); apr_array_header_t *args = ap_expr_list_make(ctx, arg->node_arg1);
do {
const ap_expr_t *val = arg->node_arg1;
const char **new = apr_array_push(args);
*new = ap_expr_eval_word(ctx, val);
arg = arg->node_arg2;
} while (arg != NULL);
return (*func)(ctx, data, args); return (*func)(ctx, data, args);
} }
else { else {
@ -249,6 +307,170 @@ static int intstrcmp(const char *s1, const char *s2)
return 1; return 1;
} }
static const char *ap_expr_regexec(const char *subject,
const ap_expr_t *reg,
apr_array_header_t *list,
ap_expr_eval_ctx_t *ctx)
{
struct ap_varbuf vb;
const char *val = subject;
const ap_regex_t *regex = reg->node_arg1;
const ap_expr_regctx_t *regctx = reg->node_arg2;
ap_regmatch_t *pmatch = NULL, match0;
apr_size_t nmatch = 0;
const char *str = "";
apr_size_t len = 0;
int empty = 0, rv;
ap_varbuf_init(ctx->p, &vb, 0);
if (ctx->re_nmatch > 0) {
nmatch = ctx->re_nmatch;
pmatch = ctx->re_pmatch;
}
else if (regctx->type != 'm') {
nmatch = 1;
pmatch = &match0;
}
do {
/* If previous match was empty, we can't issue the exact same one or
* we'd loop indefinitively. So let's instead ask for an anchored and
* non-empty match (i.e. something not empty at the start of the value)
* and if nothing is found advance by one character below.
*/
rv = ap_regexec(regex, val, nmatch, pmatch,
empty ? AP_REG_ANCHORED | AP_REG_NOTEMPTY : 0);
if (regctx->type == 'm') {
/* Simple match "m//", just return whether it matched (subject)
* or not (NULL)
*/
return (rv == 0) ? subject : NULL;
}
if (rv == 0) {
/* Substitution "s//" or split "S//" matched.
* s// => replace $0 with evaluated regctx->subst
* S// => split at $0 (keeping evaluated regctx->subst if any)
*/
int pos = pmatch[0].rm_so,
end = pmatch[0].rm_eo;
AP_DEBUG_ASSERT(pos >= 0 && pos <= end);
if (regctx->subst) {
*ctx->re_source = val;
str = ap_expr_eval_word(ctx, regctx->subst);
len = strlen(str);
}
/* Splitting makes sense into a given list only, if NULL we fall
* back into returning a s// string...
*/
if (list) {
char *tmp = apr_palloc(ctx->p, pos + len + 1);
memcpy(tmp, val, pos);
memcpy(tmp + pos, str, len);
tmp[pos + len] = '\0';
APR_ARRAY_PUSH(list, const char*) = tmp;
}
else { /* regctx->type == 's' */
ap_varbuf_grow(&vb, pos + len + 1);
ap_varbuf_strmemcat(&vb, val, pos);
ap_varbuf_strmemcat(&vb, str, len);
if (!(regctx->flags & AP_REG_MULTI)) {
/* Single substitution, preserve remaining data */
ap_varbuf_strmemcat(&vb, val + end, strlen(val) - end);
break;
}
}
/* Note an empty match */
empty = (end == 0);
val += end;
}
else if (empty) {
/* Skip this non-matching character (or CRLF) and restart
* another "normal" match (possibly empty) from there.
*/
if (val[0] == APR_ASCII_CR && val[1] == APR_ASCII_LF) {
val += 2;
}
else {
val++;
}
empty = 0;
}
else {
if (list) {
APR_ARRAY_PUSH(list, const char*) = val;
}
else if (vb.avail) {
ap_varbuf_strmemcat(&vb, val, strlen(val));
}
else {
return val;
}
break;
}
} while (*val);
return vb.buf;
}
static apr_array_header_t *ap_expr_list_make(ap_expr_eval_ctx_t *ctx,
const ap_expr_t *node)
{
apr_array_header_t *list = NULL;
if (node->node_op == op_ListRegex) {
const ap_expr_t *arg = node->node_arg1;
const ap_expr_t *reg = node->node_arg2;
const ap_expr_regctx_t *regctx = reg->node_arg2;
const apr_array_header_t *source = ap_expr_list_make(ctx, arg);
int i;
list = apr_array_make(ctx->p, source->nelts, sizeof(const char*));
for (i = 0; i < source->nelts; ++i) {
const char *val = APR_ARRAY_IDX(source, i, const char*);
if (regctx->type == 'S') {
(void)ap_expr_regexec(val, reg, list, ctx);
}
else {
val = ap_expr_regexec(val, reg, NULL, ctx);
if (val) {
APR_ARRAY_PUSH(list, const char*) = val;
}
}
}
}
else if (node->node_op == op_ListElement) {
int n = 0;
const ap_expr_t *elem;
for (elem = node; elem; elem = elem->node_arg2) {
AP_DEBUG_ASSERT(elem->node_op == op_ListElement);
n++;
}
list = apr_array_make(ctx->p, n, sizeof(const char*));
for (elem = node; elem; elem = elem->node_arg2) {
APR_ARRAY_PUSH(list, const char*) =
ap_expr_eval_word(ctx, elem->node_arg1);
}
}
else if (node->node_op == op_ListFuncCall) {
const ap_expr_t *info = node->node_arg1;
ap_expr_list_func_t *func = info->node_arg1;
AP_DEBUG_ASSERT(func != NULL);
AP_DEBUG_ASSERT(info->node_op == op_ListFuncInfo);
list = (*func)(ctx, info->node_arg2,
ap_expr_eval_word(ctx, node->node_arg2));
}
else {
const char *subject = ap_expr_eval_word(ctx, node);
list = apr_array_make(ctx->p, 8, sizeof(const char*));
(void)ap_expr_regexec(subject, node->node_arg2, list, ctx);
}
return list;
}
static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node) static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
{ {
const ap_expr_t *e1 = node->node_arg1; const ap_expr_t *e1 = node->node_arg1;
@ -279,30 +501,17 @@ static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_STR_GE: case op_STR_GE:
return (strcmp(ap_expr_eval_word(ctx, e1), ap_expr_eval_word(ctx, e2)) >= 0); return (strcmp(ap_expr_eval_word(ctx, e1), ap_expr_eval_word(ctx, e2)) >= 0);
case op_IN: { case op_IN: {
const char *needle = ap_expr_eval_word(ctx, e1); int n;
if (e2->node_op == op_ListElement) { const char *needle, *subject;
do { apr_array_header_t *haystack;
const ap_expr_t *val = e2->node_arg1; haystack = ap_expr_list_make(ctx, e2);
AP_DEBUG_ASSERT(e2->node_op == op_ListElement); if (haystack) {
if (strcmp(needle, ap_expr_eval_word(ctx, val)) == 0) needle = ap_expr_eval_word(ctx, e1);
for (n = 0; n < haystack->nelts; ++n) {
subject = APR_ARRAY_IDX(haystack, n, const char*);
if (strcmp(needle, subject) == 0) {
return 1; return 1;
e2 = e2->node_arg2; }
} while (e2 != NULL);
}
else if (e2->node_op == op_ListFuncCall) {
const ap_expr_t *info = e2->node_arg1;
const ap_expr_t *arg = e2->node_arg2;
ap_expr_list_func_t *func = (ap_expr_list_func_t *)info->node_arg1;
apr_array_header_t *haystack;
AP_DEBUG_ASSERT(func != NULL);
AP_DEBUG_ASSERT(info->node_op == op_ListFuncInfo);
haystack = (*func)(ctx, info->node_arg2, ap_expr_eval_word(ctx, arg));
if (haystack == NULL) {
return 0;
}
if (ap_array_str_contains(haystack, needle)) {
return 1;
} }
} }
return 0; return 0;
@ -326,10 +535,7 @@ static int ap_expr_eval_comp(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
result = (0 == ap_regexec(regex, word, 0, NULL, 0)); result = (0 == ap_regexec(regex, word, 0, NULL, 0));
} }
if (node->node_op == op_REG) return result ^ (node->node_op == op_NRE);
return result;
else
return !result;
} }
default: default:
*ctx->err = "Internal evaluation error: Unknown comp expression node"; *ctx->err = "Internal evaluation error: Unknown comp expression node";
@ -401,18 +607,13 @@ AP_DECLARE(const char *) ap_expr_parse(apr_pool_t *pool, apr_pool_t *ptemp,
ap_expr_parse_ctx_t ctx; ap_expr_parse_ctx_t ctx;
int rc; int rc;
memset(&ctx, 0, sizeof ctx);
ctx.pool = pool; ctx.pool = pool;
ctx.ptemp = ptemp; ctx.ptemp = ptemp;
ctx.inputbuf = expr; ctx.inputbuf = expr;
ctx.inputlen = strlen(expr); ctx.inputlen = strlen(expr);
ctx.inputptr = ctx.inputbuf; ctx.inputptr = ctx.inputbuf;
ctx.expr = NULL;
ctx.error = NULL; /* generic bison error message (XXX: usually not very useful, should be axed) */
ctx.error2 = NULL; /* additional error message */
ctx.flags = info->flags; ctx.flags = info->flags;
ctx.scan_del = '\0';
ctx.scan_buf[0] = '\0';
ctx.scan_ptr = ctx.scan_buf;
ctx.lookup_fn = lookup_fn ? lookup_fn : ap_expr_lookup_default; ctx.lookup_fn = lookup_fn ? lookup_fn : ap_expr_lookup_default;
ctx.at_start = 1; ctx.at_start = 1;
@ -420,6 +621,11 @@ AP_DECLARE(const char *) ap_expr_parse(apr_pool_t *pool, apr_pool_t *ptemp,
ap_expr_yyset_extra(&ctx, ctx.scanner); ap_expr_yyset_extra(&ctx, ctx.scanner);
rc = ap_expr_yyparse(&ctx); rc = ap_expr_yyparse(&ctx);
ap_expr_yylex_destroy(ctx.scanner); ap_expr_yylex_destroy(ctx.scanner);
/* ctx.error: the generic bison error message
* (XXX: usually not very useful, should be axed)
* ctx.error2: an additional error message
*/
if (ctx.error) { if (ctx.error) {
if (ctx.error2) if (ctx.error2)
return apr_psprintf(pool, "%s: %s", ctx.error, ctx.error2); return apr_psprintf(pool, "%s: %s", ctx.error, ctx.error2);
@ -464,7 +670,7 @@ AP_DECLARE(ap_expr_info_t*) ap_expr_parse_cmd_mi(const cmd_parms *cmd,
} }
ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *a1, const void *a2, ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *a1, const void *a2,
ap_expr_parse_ctx_t *ctx) ap_expr_parse_ctx_t *ctx)
{ {
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t)); ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op; node->node_op = op;
@ -473,6 +679,100 @@ ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *a1, const void *a2,
return node; return node;
} }
ap_expr_t *ap_expr_concat_make(const void *a1, const void *a2,
ap_expr_parse_ctx_t *ctx)
{
const ap_expr_t *node;
/* Optimize out empty string(s) concatenation */
if ((node = a1)
&& node->node_op == op_String
&& !*(const char *)node->node_arg1) {
return (ap_expr_t *)a2;
}
if ((node = a2)
&& node->node_op == op_String
&& !*(const char *)node->node_arg1) {
return (ap_expr_t *)a1;
}
return ap_expr_make(op_Concat, a1, a2, ctx);
}
ap_expr_t *ap_expr_str_word_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Word;
node->node_arg1 = arg;
node->node_arg2 = NULL;
return node;
}
ap_expr_t *ap_expr_str_bool_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Bool;
node->node_arg1 = arg;
node->node_arg2 = NULL;
return node;
}
ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags,
const ap_expr_t *subst, int split,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = NULL;
ap_expr_regctx_t *regctx;
ap_regex_t *regex;
regctx = apr_palloc(ctx->pool, sizeof *regctx);
regctx->subst = subst;
regctx->flags = 0;
if (flags) {
for (; *flags; ++flags) {
switch (*flags) {
case 'i':
regctx->flags |= AP_REG_ICASE;
break;
case 'm':
regctx->flags |= AP_REG_NEWLINE;
break;
case 's':
regctx->flags |= AP_REG_DOTALL;
break;
case 'g':
regctx->flags |= AP_REG_MULTI;
break;
}
}
}
if (subst) {
if (split) {
regctx->type = 'S';
regctx->flags |= AP_REG_MULTI;
}
else {
regctx->type = 's';
}
}
else {
regctx->type = 'm';
}
regex = ap_pregcomp(ctx->pool, pattern, regctx->flags);
if (!regex) {
return NULL;
}
node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_Regex;
node->node_arg1 = regex;
node->node_arg2 = regctx;
return node;
}
static ap_expr_t *ap_expr_info_make(int type, const char *name, static ap_expr_t *ap_expr_info_make(int type, const char *name,
ap_expr_parse_ctx_t *ctx, ap_expr_parse_ctx_t *ctx,
const ap_expr_t *arg) const ap_expr_t *arg)
@ -533,6 +833,16 @@ ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg,
return ap_expr_make(op_ListFuncCall, info, arg, ctx); return ap_expr_make(op_ListFuncCall, info, arg, ctx);
} }
ap_expr_t *ap_expr_list_regex_make(const ap_expr_t *arg, const ap_expr_t *reg,
ap_expr_parse_ctx_t *ctx)
{
ap_expr_t *node = apr_palloc(ctx->pool, sizeof(ap_expr_t));
node->node_op = op_ListRegex;
node->node_arg1 = arg;
node->node_arg2 = reg;
return node;
}
ap_expr_t *ap_expr_unary_op_make(const char *name, const ap_expr_t *arg, ap_expr_t *ap_expr_unary_op_make(const char *name, const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx) ap_expr_parse_ctx_t *ctx)
{ {
@ -654,10 +964,15 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
case op_IN: case op_IN:
case op_REG: case op_REG:
case op_NRE: case op_NRE:
case op_Word:
case op_Bool:
case op_Join:
case op_Regsub:
case op_Concat: case op_Concat:
case op_StringFuncCall: case op_StringFuncCall:
case op_ListFuncCall: case op_ListFuncCall:
case op_ListElement: case op_ListElement:
case op_ListRegex:
{ {
char *name; char *name;
switch (e->node_op) { switch (e->node_op) {
@ -680,10 +995,15 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
CASE_OP(op_IN); CASE_OP(op_IN);
CASE_OP(op_REG); CASE_OP(op_REG);
CASE_OP(op_NRE); CASE_OP(op_NRE);
CASE_OP(op_Word);
CASE_OP(op_Bool);
CASE_OP(op_Join);
CASE_OP(op_Regsub);
CASE_OP(op_Concat); CASE_OP(op_Concat);
CASE_OP(op_StringFuncCall); CASE_OP(op_StringFuncCall);
CASE_OP(op_ListFuncCall); CASE_OP(op_ListFuncCall);
CASE_OP(op_ListElement); CASE_OP(op_ListElement);
CASE_OP(op_ListRegex);
default: default:
ap_assert(0); ap_assert(0);
} }
@ -729,8 +1049,8 @@ static void expr_dump_tree(const ap_expr_t *e, const server_rec *s,
DUMP_P("op_Regex", e->node_arg1); DUMP_P("op_Regex", e->node_arg1);
break; break;
/* arg1: pointer to int */ /* arg1: pointer to int */
case op_RegexBackref: case op_Regref:
DUMP_IP("op_RegexBackref", e->node_arg1); DUMP_IP("op_Regref", e->node_arg1);
break; break;
default: default:
ap_log_error(MARK, "%*sERROR: INVALID OP %d", indent, " ", e->node_op); ap_log_error(MARK, "%*sERROR: INVALID OP %d", indent, " ", e->node_op);
@ -769,7 +1089,7 @@ static int ap_expr_eval_binary_op(ap_expr_eval_ctx_t *ctx,
} }
static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node) static int ap_expr_eval_cond(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
{ {
const ap_expr_t *e1 = node->node_arg1; const ap_expr_t *e1 = node->node_arg1;
const ap_expr_t *e2 = node->node_arg2; const ap_expr_t *e2 = node->node_arg2;
@ -791,13 +1111,13 @@ static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_Or: case op_Or:
do { do {
if (e1->node_op == op_Not) { if (e1->node_op == op_Not) {
if (!ap_expr_eval(ctx, e1->node_arg1)) { if (!ap_expr_eval_cond(ctx, e1->node_arg1)) {
result ^= TRUE; result ^= TRUE;
goto out; goto out;
} }
} }
else { else {
if (ap_expr_eval(ctx, e1)) { if (ap_expr_eval_cond(ctx, e1)) {
result ^= TRUE; result ^= TRUE;
goto out; goto out;
} }
@ -809,13 +1129,13 @@ static int ap_expr_eval(ap_expr_eval_ctx_t *ctx, const ap_expr_t *node)
case op_And: case op_And:
do { do {
if (e1->node_op == op_Not) { if (e1->node_op == op_Not) {
if (ap_expr_eval(ctx, e1->node_arg1)) { if (ap_expr_eval_cond(ctx, e1->node_arg1)) {
result ^= FALSE; result ^= FALSE;
goto out; goto out;
} }
} }
else { else {
if (!ap_expr_eval(ctx, e1)) { if (!ap_expr_eval_cond(ctx, e1)) {
result ^= FALSE; result ^= FALSE;
goto out; goto out;
} }
@ -889,7 +1209,7 @@ AP_DECLARE(int) ap_expr_exec_ctx(ap_expr_eval_ctx_t *ctx)
} }
} }
else { else {
rc = ap_expr_eval(ctx, ctx->info->root_node); rc = ap_expr_eval_cond(ctx, ctx->info->root_node);
if (*ctx->err != NULL) { if (*ctx->err != NULL) {
ap_log_rerror(LOG_MARK(ctx->info), APLOG_ERR, 0, ctx->r, ap_log_rerror(LOG_MARK(ctx->info), APLOG_ERR, 0, ctx->r,
APLOGNO(03299) APLOGNO(03299)

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* A Bison parser, made by GNU Bison 2.7.12-4996. */ /* A Bison parser, made by GNU Bison 2.7.1. */
/* Bison interface for Yacc-like parsers in C /* Bison interface for Yacc-like parsers in C
@ -55,33 +55,40 @@ extern int ap_expr_yydebug;
T_ID = 264, T_ID = 264,
T_STRING = 265, T_STRING = 265,
T_REGEX = 266, T_REGEX = 266,
T_REGEX_I = 267, T_REGSUB = 267,
T_REGEX_BACKREF = 268, T_REG_MATCH = 268,
T_OP_UNARY = 269, T_REG_SUBST = 269,
T_OP_BINARY = 270, T_REG_FLAGS = 270,
T_STR_BEGIN = 271, T_REG_REF = 271,
T_STR_END = 272, T_OP_UNARY = 272,
T_VAR_BEGIN = 273, T_OP_BINARY = 273,
T_VAR_END = 274, T_STR_BEGIN = 274,
T_OP_EQ = 275, T_STR_END = 275,
T_OP_NE = 276, T_VAR_BEGIN = 276,
T_OP_LT = 277, T_VAR_END = 277,
T_OP_LE = 278, T_VAREXP_BEGIN = 278,
T_OP_GT = 279, T_VAREXP_END = 279,
T_OP_GE = 280, T_OP_EQ = 280,
T_OP_REG = 281, T_OP_NE = 281,
T_OP_NRE = 282, T_OP_LT = 282,
T_OP_IN = 283, T_OP_LE = 283,
T_OP_STR_EQ = 284, T_OP_GT = 284,
T_OP_STR_NE = 285, T_OP_GE = 285,
T_OP_STR_LT = 286, T_OP_REG = 286,
T_OP_STR_LE = 287, T_OP_NRE = 287,
T_OP_STR_GT = 288, T_OP_IN = 288,
T_OP_STR_GE = 289, T_OP_STR_EQ = 289,
T_OP_CONCAT = 290, T_OP_STR_NE = 290,
T_OP_OR = 291, T_OP_STR_LT = 291,
T_OP_AND = 292, T_OP_STR_LE = 292,
T_OP_NOT = 293 T_OP_STR_GT = 293,
T_OP_STR_GE = 294,
T_OP_CONCAT = 295,
T_OP_SPLIT = 296,
T_OP_JOIN = 297,
T_OP_OR = 298,
T_OP_AND = 299,
T_OP_NOT = 300
}; };
#endif #endif
@ -98,7 +105,7 @@ typedef union YYSTYPE
/* Line 2053 of yacc.c */ /* Line 2053 of yacc.c */
#line 102 "util_expr_parse.h" #line 109 "util_expr_parse.h"
} YYSTYPE; } YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1 # define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define yystype YYSTYPE /* obsolescent; will be withdrawn */

View File

@ -48,10 +48,15 @@
%token <cpVal> T_DIGIT "number" %token <cpVal> T_DIGIT "number"
%token <cpVal> T_ID "identifier" %token <cpVal> T_ID "identifier"
%token <cpVal> T_STRING "cstring" %token <cpVal> T_STRING "string"
%token <cpVal> T_REGEX "regex"
%token <cpVal> T_REGEX_I "case-indendent regex" %token T_REGEX "match regex"
%token <num> T_REGEX_BACKREF "regex back reference" %token T_REGSUB "substitution regex"
%token <cpVal> T_REG_MATCH "match pattern of the regex"
%token <cpVal> T_REG_SUBST "substitution pattern of the regex"
%token <cpVal> T_REG_FLAGS "flags of the regex"
%token <num> T_REG_REF "regex back reference"
%token <cpVal> T_OP_UNARY "unary operator" %token <cpVal> T_OP_UNARY "unary operator"
%token <cpVal> T_OP_BINARY "binary operator" %token <cpVal> T_OP_BINARY "binary operator"
@ -59,6 +64,8 @@
%token T_STR_END "end of string" %token T_STR_END "end of string"
%token T_VAR_BEGIN "start of variable name" %token T_VAR_BEGIN "start of variable name"
%token T_VAR_END "end of variable name" %token T_VAR_END "end of variable name"
%token T_VAREXP_BEGIN "start of variable expression"
%token T_VAREXP_END "end of variable expression"
%token T_OP_EQ "integer equal" %token T_OP_EQ "integer equal"
%token T_OP_NE "integer not equal" %token T_OP_NE "integer not equal"
@ -75,8 +82,12 @@
%token T_OP_STR_LE "string less or equal" %token T_OP_STR_LE "string less or equal"
%token T_OP_STR_GT "string greater than" %token T_OP_STR_GT "string greater than"
%token T_OP_STR_GE "string greater or equal" %token T_OP_STR_GE "string greater or equal"
%token T_OP_CONCAT "string concatenation" %token T_OP_CONCAT "string concatenation"
%token T_OP_SPLIT "split operator"
%token T_OP_JOIN "join operator"
%token T_OP_OR "logical or" %token T_OP_OR "logical or"
%token T_OP_AND "logical and" %token T_OP_AND "logical and"
%token T_OP_NOT "logical not" %token T_OP_NOT "logical not"
@ -86,18 +97,21 @@
%right T_OP_NOT %right T_OP_NOT
%right T_OP_CONCAT %right T_OP_CONCAT
%type <exVal> expr %type <exVal> cond "condition"
%type <exVal> comparison %type <exVal> comp "comparison"
%type <exVal> strfunccall "function" %type <exVal> strfunc "string function"
%type <exVal> lstfunccall "listfunction" %type <exVal> lstfunc "list function"
%type <exVal> regex %type <exVal> wordlist "list of words"
%type <exVal> words %type <exVal> words "tuple of words"
%type <exVal> wordlist %type <exVal> word "word expression"
%type <exVal> word %type <exVal> string "string expression"
%type <exVal> string %type <exVal> strany "any string expression"
%type <exVal> strpart "stringpart" %type <exVal> var "variable expression"
%type <exVal> var "variable" %type <exVal> regex "regular expression match"
%type <exVal> backref "rebackref" %type <exVal> regsub "regular expression substitution"
%type <exVal> regsplit "regular expression split"
%type <exVal> regany "any regular expression"
%type <exVal> regref "regular expression back reference"
%{ %{
#include "util_expr_private.h" #include "util_expr_private.h"
@ -109,24 +123,24 @@ int ap_expr_yylex(YYSTYPE *lvalp, void *scanner);
%% %%
root : T_EXPR_BOOL expr { ctx->expr = $2; } root : T_EXPR_BOOL cond { ctx->expr = $2; }
| T_EXPR_STRING string { ctx->expr = $2; } | T_EXPR_STRING string { ctx->expr = $2; }
| T_ERROR { YYABORT; } | T_ERROR { YYABORT; }
; ;
expr : T_TRUE { $$ = ap_expr_make(op_True, NULL, NULL, ctx); } cond : T_TRUE { $$ = ap_expr_make(op_True, NULL, NULL, ctx); }
| T_FALSE { $$ = ap_expr_make(op_False, NULL, NULL, ctx); } | T_FALSE { $$ = ap_expr_make(op_False, NULL, NULL, ctx); }
| T_OP_NOT expr { $$ = ap_expr_make(op_Not, $2, NULL, ctx); } | T_OP_NOT cond { $$ = ap_expr_make(op_Not, $2, NULL, ctx); }
| expr T_OP_OR expr { $$ = ap_expr_make(op_Or, $1, $3, ctx); } | cond T_OP_OR cond { $$ = ap_expr_make(op_Or, $1, $3, ctx); }
| expr T_OP_AND expr { $$ = ap_expr_make(op_And, $1, $3, ctx); } | cond T_OP_AND cond { $$ = ap_expr_make(op_And, $1, $3, ctx); }
| comparison { $$ = ap_expr_make(op_Comp, $1, NULL, ctx); } | comp { $$ = ap_expr_make(op_Comp, $1, NULL, ctx); }
| T_OP_UNARY word { $$ = ap_expr_unary_op_make( $1, $2, ctx); } | T_OP_UNARY word { $$ = ap_expr_unary_op_make( $1, $2, ctx); }
| word T_OP_BINARY word { $$ = ap_expr_binary_op_make($2, $1, $3, ctx); } | word T_OP_BINARY word { $$ = ap_expr_binary_op_make($2, $1, $3, ctx); }
| '(' expr ')' { $$ = $2; } | '(' cond ')' { $$ = $2; }
| T_ERROR { YYABORT; } | T_ERROR { YYABORT; }
; ;
comparison: word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, ctx); } comp : word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3, ctx); }
| word T_OP_NE word { $$ = ap_expr_make(op_NE, $1, $3, ctx); } | word T_OP_NE word { $$ = ap_expr_make(op_NE, $1, $3, ctx); }
| word T_OP_LT word { $$ = ap_expr_make(op_LT, $1, $3, ctx); } | word T_OP_LT word { $$ = ap_expr_make(op_LT, $1, $3, ctx); }
| word T_OP_LE word { $$ = ap_expr_make(op_LE, $1, $3, ctx); } | word T_OP_LE word { $$ = ap_expr_make(op_LE, $1, $3, ctx); }
@ -143,70 +157,107 @@ comparison: word T_OP_EQ word { $$ = ap_expr_make(op_EQ, $1, $3,
| word T_OP_NRE regex { $$ = ap_expr_make(op_NRE, $1, $3, ctx); } | word T_OP_NRE regex { $$ = ap_expr_make(op_NRE, $1, $3, ctx); }
; ;
wordlist : lstfunccall { $$ = $1; } wordlist : lstfunc { $$ = $1; }
| word T_OP_REG regsplit { $$ = ap_expr_list_regex_make($1, $3, ctx); }
| wordlist T_OP_REG regany { $$ = ap_expr_list_regex_make($1, $3, ctx); }
| '{' words '}' { $$ = $2; } | '{' words '}' { $$ = $2; }
| '(' wordlist ')' { $$ = $2; }
; ;
words : word { $$ = ap_expr_make(op_ListElement, $1, NULL, ctx); } words : word { $$ = ap_expr_make(op_ListElement, $1, NULL, ctx); }
| words ',' word { $$ = ap_expr_make(op_ListElement, $3, $1, ctx); } | word ',' words { $$ = ap_expr_make(op_ListElement, $1, $3, ctx); }
; ;
string : string strpart { $$ = ap_expr_make(op_Concat, $1, $2, ctx); } string : strany { $$ = $1; }
| strpart { $$ = $1; } | string strany { $$ = ap_expr_concat_make($1, $2, ctx); }
| T_ERROR { YYABORT; } | T_ERROR { YYABORT; }
; ;
strpart : T_STRING { $$ = ap_expr_make(op_String, $1, NULL, ctx); } strany : T_STRING { $$ = ap_expr_make(op_String, $1, NULL, ctx); }
| var { $$ = $1; } | var { $$ = $1; }
| backref { $$ = $1; } | regref { $$ = $1; }
; ;
var : T_VAR_BEGIN T_ID T_VAR_END { $$ = ap_expr_var_make($2, ctx); } var : T_VAR_BEGIN T_ID T_VAR_END { $$ = ap_expr_var_make($2, ctx); }
| T_VAR_BEGIN T_ID ':' string T_VAR_END { $$ = ap_expr_str_func_make($2, $4, ctx); } | T_VAR_BEGIN T_ID ':' string T_VAR_END { $$ = ap_expr_str_func_make($2, $4, ctx); }
| T_VAREXP_BEGIN word T_VAREXP_END { $$ = ap_expr_str_word_make($2, ctx); }
| T_VAREXP_BEGIN cond T_VAREXP_END { $$ = ap_expr_str_bool_make($2, ctx); }
; ;
word : T_DIGIT { $$ = ap_expr_make(op_Digit, $1, NULL, ctx); } word : T_DIGIT { $$ = ap_expr_make(op_Digit, $1, NULL, ctx); }
| word T_OP_CONCAT word { $$ = ap_expr_make(op_Concat, $1, $3, ctx); }
| var { $$ = $1; }
| backref { $$ = $1; }
| strfunccall { $$ = $1; }
| T_STR_BEGIN string T_STR_END { $$ = $2; }
| T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); } | T_STR_BEGIN T_STR_END { $$ = ap_expr_make(op_String, "", NULL, ctx); }
| T_STR_BEGIN string T_STR_END { $$ = $2; }
| word T_OP_CONCAT word { $$ = ap_expr_make(op_Concat, $1, $3, ctx); }
| word T_OP_REG regsub { $$ = ap_expr_make(op_Regsub, $1, $3, ctx); }
| var { $$ = $1; }
| regref { $$ = $1; }
| strfunc { $$ = $1; }
| T_OP_JOIN wordlist {
$$ = ap_expr_make(op_Join, $2, NULL, ctx);
}
| T_OP_JOIN wordlist ',' word {
$$ = ap_expr_make(op_Join, $2, $4, ctx);
}
| T_OP_JOIN '(' wordlist ',' word ')' {
$$ = ap_expr_make(op_Join, $3, $5, ctx);
}
| '(' word ')' { $$ = $2; }
; ;
regex : T_REGEX { regex : T_REGEX T_REG_MATCH T_REG_FLAGS {
ap_regex_t *regex; ap_expr_t *e = ap_expr_regex_make($2, $3, NULL, 0, ctx);
if ((regex = ap_pregcomp(ctx->pool, $1, if (!e) {
AP_REG_EXTENDED|AP_REG_NOSUB)) == NULL) {
ctx->error = "Failed to compile regular expression"; ctx->error = "Failed to compile regular expression";
YYERROR; YYERROR;
} }
$$ = ap_expr_make(op_Regex, regex, NULL, ctx); $$ = e;
} }
| T_REGEX_I { ;
ap_regex_t *regex; regsub : T_REGSUB T_REG_MATCH string T_REG_FLAGS {
if ((regex = ap_pregcomp(ctx->pool, $1, ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 0, ctx);
AP_REG_EXTENDED|AP_REG_NOSUB|AP_REG_ICASE)) == NULL) { if (!e) {
ctx->error = "Failed to compile regular expression"; ctx->error = "Failed to compile regular expression";
YYERROR; YYERROR;
} }
$$ = ap_expr_make(op_Regex, regex, NULL, ctx); $$ = e;
} }
; ;
regsplit : T_OP_SPLIT T_REG_MATCH string T_REG_FLAGS {
/* Returns a list:
* <word> ~= split/://
* => split around ':', replace it with empty
* <word> ~= split/:/\n/
* => split around ':', replace it with '\n'
* <list> ~= split/.*?Ip Address:([^,]+)/$1/
* => split around the whole match, replace it with $1
*/
ap_expr_t *e = ap_expr_regex_make($2, $4, $3, 1, ctx);
if (!e) {
ctx->error = "Failed to compile regular expression";
YYERROR;
}
$$ = e;
}
;
regany : regex { $$ = $1; }
| regsub { $$ = $1; }
| regsplit { $$ = $1; }
;
backref : T_REGEX_BACKREF { regref : T_REG_REF {
int *n = apr_palloc(ctx->pool, sizeof(int)); int *n = apr_palloc(ctx->pool, sizeof(int));
*n = $1; *n = $1;
$$ = ap_expr_make(op_RegexBackref, n, NULL, ctx); $$ = ap_expr_make(op_Regref, n, NULL, ctx);
} }
; ;
lstfunccall : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } lstfunc : T_ID '(' word ')' { $$ = ap_expr_list_func_make($1, $3, ctx); }
; /* | T_ID '(' words ')' { $$ = ap_expr_list_func_make($1, $3, ctx); } */
;
strfunccall : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } strfunc : T_ID '(' word ')' { $$ = ap_expr_str_func_make($1, $3, ctx); }
| T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); } | T_ID '(' words ')' { $$ = ap_expr_str_func_make($1, $3, ctx); }
; ;
%% %%

View File

@ -54,9 +54,10 @@ typedef enum {
op_REG, op_NRE, op_REG, op_NRE,
op_STR_EQ, op_STR_NE, op_STR_LT, op_STR_LE, op_STR_GT, op_STR_GE, op_STR_EQ, op_STR_NE, op_STR_LT, op_STR_LE, op_STR_GT, op_STR_GE,
op_Concat, op_Concat,
op_Digit, op_String, op_Regex, op_RegexBackref, op_Digit, op_String,
op_Var, op_Var, op_Word, op_Bool, op_Join,
op_ListElement, op_Regex, op_Regsub, op_Regref,
op_ListElement, op_ListRegex,
/* /*
* call external functions/operators. * call external functions/operators.
* The info node contains the function pointer and some function specific * The info node contains the function pointer and some function specific
@ -79,6 +80,15 @@ struct ap_expr_node {
const void *node_arg2; const void *node_arg2;
}; };
/** The stack used by scanner and parser */
typedef struct ap_expr_parser_stack {
char *scan_ptr;
char scan_buf[MAX_STRING_LEN];
int scan_stop;
int scan_flag;
struct ap_expr_parser_stack *next;
} ap_expr_parser_stack_t;
/** The context used by scanner and parser */ /** The context used by scanner and parser */
typedef struct { typedef struct {
/* internal state of the scanner */ /* internal state of the scanner */
@ -86,9 +96,8 @@ typedef struct {
int inputlen; int inputlen;
const char *inputptr; const char *inputptr;
void *scanner; void *scanner;
char *scan_ptr; ap_expr_parser_stack_t *current,
char scan_buf[MAX_STRING_LEN]; *spares;
char scan_del;
int at_start; int at_start;
/* pools for result and temporary usage */ /* pools for result and temporary usage */
@ -119,12 +128,23 @@ void ap_expr_yyset_extra(ap_expr_parse_ctx_t *context, void *scanner);
/* create a parse tree node */ /* create a parse tree node */
ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *arg1, ap_expr_t *ap_expr_make(ap_expr_node_op_e op, const void *arg1,
const void *arg2, ap_expr_parse_ctx_t *ctx); const void *arg2, ap_expr_parse_ctx_t *ctx);
ap_expr_t *ap_expr_concat_make(const void *a1, const void *a2,
ap_expr_parse_ctx_t *ctx);
ap_expr_t *ap_expr_str_word_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx);
ap_expr_t *ap_expr_str_bool_make(const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx);
ap_expr_t *ap_expr_regex_make(const char *pattern, const char *flags,
const ap_expr_t *subst, int split,
ap_expr_parse_ctx_t *ctx);
/* create parse tree node for the string-returning function 'name' */ /* create parse tree node for the string-returning function 'name' */
ap_expr_t *ap_expr_str_func_make(const char *name, const ap_expr_t *arg, ap_expr_t *ap_expr_str_func_make(const char *name, const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx); ap_expr_parse_ctx_t *ctx);
/* create parse tree node for the list-returning function 'name' */ /* create parse tree node for the list-returning function 'name' */
ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg, ap_expr_t *ap_expr_list_func_make(const char *name, const ap_expr_t *arg,
ap_expr_parse_ctx_t *ctx); ap_expr_parse_ctx_t *ctx);
ap_expr_t *ap_expr_list_regex_make(const ap_expr_t *lst, const ap_expr_t *re,
ap_expr_parse_ctx_t *ctx);
/* create parse tree node for the variable 'name' */ /* create parse tree node for the variable 'name' */
ap_expr_t *ap_expr_var_make(const char *name, ap_expr_parse_ctx_t *ctx); ap_expr_t *ap_expr_var_make(const char *name, ap_expr_parse_ctx_t *ctx);
/* create parse tree node for the unary operator 'name' */ /* create parse tree node for the unary operator 'name' */

File diff suppressed because it is too large Load Diff

View File

@ -34,16 +34,17 @@
%option warn %option warn
%option noinput nounput noyy_top_state %option noinput nounput noyy_top_state
%option stack %option stack
%x str
%x var %x str expr
%x vararg %x var vararg
%x regex regex_flags %x split regex regsub regflags
%{ %{
#include "util_expr_private.h" #include "util_expr_private.h"
#include "util_expr_parse.h" #include "util_expr_parse.h"
#include "http_main.h" #include "http_main.h"
#include "http_log.h" #include "http_log.h"
#include "apr_lib.h"
#undef YY_INPUT #undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \ #define YY_INPUT(buf,result,max_size) \
@ -65,38 +66,118 @@
* XXX: longjmp. It is not clear if the scanner is in any state * XXX: longjmp. It is not clear if the scanner is in any state
* XXX: to be cleaned up, though. * XXX: to be cleaned up, though.
*/ */
static int unreachable = 0;
#define YY_FATAL_ERROR(msg) \ #define YY_FATAL_ERROR(msg) \
do { \ do { \
ap_log_error(APLOG_MARK, APLOG_CRIT, 0, ap_server_conf, \ ap_log_error(APLOG_MARK, APLOG_CRIT, 0, ap_server_conf, \
APLOGNO(03296) \ APLOGNO(03296) \
"expr parser fatal error (BUG?): " \ "expr parser fatal error (BUG?): " \
"%s, exiting", msg); \ "%s, exiting", msg); \
abort(); \ if (unreachable) { \
/* Not reached, silence [-Wunused-function] */ \
yy_fatal_error(msg, yyscanner); \
} \
else { \
abort(); \
} \
} while (0) } while (0)
#define YY_EXTRA_TYPE ap_expr_parse_ctx_t* #define YY_EXTRA_TYPE ap_expr_parse_ctx_t*
#define PERROR(msg) do { yyextra->error2 = msg ; return T_ERROR; } while (0) #define PERROR(msg) do { \
yyextra->error2 = msg; \
return T_ERROR; \
} while (0)
#define str_ptr (yyextra->scan_ptr) #define PERROR_CHAR(prefix, chr) do { \
#define str_buf (yyextra->scan_buf) char *msg; \
#define str_del (yyextra->scan_del) if (apr_isprint((chr))) { \
msg = apr_psprintf(yyextra->pool, prefix "'%c'", (char)(chr)); \
} \
else { \
msg = apr_psprintf(yyextra->pool, prefix "'\\x%.2X'", (int)(chr)); \
} \
PERROR(msg); \
} while (0)
#define STR_APPEND(c) do { \ #define STACK_PUSH() do { \
*str_ptr++ = (c); \ ap_expr_parser_stack_t *sk; \
if (str_ptr >= str_buf + sizeof(str_buf)) \ if (yyextra->spares) { \
PERROR("String too long"); \ sk = yyextra->spares; \
} while (0) yyextra->spares = sk->next; \
} \
else { \
sk = apr_palloc(yyextra->ptemp, sizeof(*sk)); \
} \
sk->scan_ptr = sk->scan_buf; \
sk->scan_stop = sk->scan_buf[0] = '\0'; \
sk->scan_flag = 0; \
sk->next = yyextra->current; \
yyextra->current = sk; \
} while (0)
#define STACK_POP() do { \
ap_expr_parser_stack_t *sk; \
sk = yyextra->current; \
yyextra->current = sk->next; \
sk->next = yyextra->spares; \
yyextra->spares = sk; \
} while (0)
#define STATE_PUSH(st, sk) do { \
yy_push_state((st), yyscanner); \
if ((sk)) { \
STACK_PUSH(); \
} \
} while (0)
#define STATE_POP(sk) do { \
if ((sk)) { \
STACK_POP(); \
} \
yy_pop_state(yyscanner); \
} while (0)
#define str_ptr (yyextra->current->scan_ptr)
#define str_buf (yyextra->current->scan_buf)
#define str_stop (yyextra->current->scan_stop)
#define str_flag (yyextra->current->scan_flag)
#define STR_APPEND_CHECK(chr, chk) do { \
if ((chk) && apr_iscntrl((chr))) { \
PERROR_CHAR("Invalid string character ", (chr)); \
} \
if (str_ptr >= str_buf + sizeof(str_buf) - 1) { \
PERROR("String too long"); \
} \
*str_ptr++ = (char)(chr); \
} while (0)
#define STR_APPEND_NOCHECK(chr) \
STR_APPEND_CHECK((chr), 0)
#define STR_EMPTY() \
(str_ptr == str_buf)
#define STR_RETURN() \
(apr_pstrdup(yyextra->pool, (*str_ptr = '\0', str_ptr = str_buf)))
%} %}
ANY (.|\n)
SPACE [ \t\n]
QUOTE ["']
TOKEN ([a-zA-Z][a-zA-Z0-9_]*)
VAR_BEGIN (\%\{)
VAR_ARG (\:)
VAR_END (\})
VAREXP_BEGIN (\%\{\:)
VAREXP_END (\:\})
REG_SEP [/#$%^|?!'",;:._-]
REG_REF (\$[0-9])
%% %%
char regex_buf[MAX_STRING_LEN];
char *regex_ptr = NULL;
char regex_del = '\0';
%{ %{
/* /*
* Set initial state for string expressions * Set initial state for string expressions
@ -104,286 +185,306 @@
if (yyextra->at_start) { if (yyextra->at_start) {
yyextra->at_start = 0; yyextra->at_start = 0;
if (yyextra->flags & AP_EXPR_FLAG_STRING_RESULT) { if (yyextra->flags & AP_EXPR_FLAG_STRING_RESULT) {
BEGIN(str); STATE_PUSH(str, 1);
return T_EXPR_STRING; return T_EXPR_STRING;
} }
else { else {
STATE_PUSH(expr, 1);
return T_EXPR_BOOL; return T_EXPR_BOOL;
} }
} }
%} %}
/* /*
* Whitespaces * Back off INITIAL pushes
*/ */
[ \t\n]+ { <str><<EOF>> {
/* NOP */ STATE_POP(0); /* <str> */
if (YY_START != INITIAL) {
PERROR("Unterminated string");
}
yylval->cpVal = STR_RETURN();
STACK_POP(); /* ^ after this */
return T_STRING;
}
<expr><<EOF>> {
STATE_POP(1); /* <expr> */
if (YY_START != INITIAL) {
PERROR("Unterminated expression");
}
} }
/* <str>{QUOTE} {
* strings ("..." and '...') if (yytext[0] == str_stop) {
*/ if (!STR_EMPTY()) {
["'] { yyless(0); /* come back below */
str_ptr = str_buf; yylval->cpVal = STR_RETURN();
str_del = yytext[0];
BEGIN(str);
return T_STR_BEGIN;
}
<str>["'] {
if (yytext[0] == str_del) {
if (YY_START == var) {
PERROR("Unterminated variable in string");
}
else if (str_ptr == str_buf) {
BEGIN(INITIAL);
return T_STR_END;
}
else {
/* return what we have so far and scan delimiter again */
*str_ptr = '\0';
yylval->cpVal = apr_pstrdup(yyextra->pool, str_buf);
yyless(0);
str_ptr = str_buf;
return T_STRING; return T_STRING;
} }
STATE_POP(1); /* <str> */
return T_STR_END;
} }
else { STR_APPEND_NOCHECK(yytext[0]);
STR_APPEND(yytext[0]);
}
} }
<str,var,vararg>\n {
PERROR("Unterminated string or variable");
}
<var,vararg><<EOF>> {
PERROR("Unterminated string or variable");
}
<str><<EOF>> {
if (!(yyextra->flags & AP_EXPR_FLAG_STRING_RESULT)) {
PERROR("Unterminated string or variable");
}
else {
*str_ptr = '\0';
yylval->cpVal = apr_pstrdup(yyextra->pool, str_buf);
str_ptr = str_buf;
BEGIN(INITIAL);
return T_STRING;
}
}
<str,vararg>\\[0-7]{1,3} {
int result;
(void)sscanf(yytext+1, "%o", &result);
if (result > 0xff) {
PERROR("Escape sequence out of bound");
}
else {
STR_APPEND(result);
}
}
<str,vararg>\\[0-9]+ {
PERROR("Bad escape sequence");
}
<str,vararg>\\n { STR_APPEND('\n'); }
<str,vararg>\\r { STR_APPEND('\r'); }
<str,vararg>\\t { STR_APPEND('\t'); }
<str,vararg>\\b { STR_APPEND('\b'); }
<str,vararg>\\f { STR_APPEND('\f'); }
<str,vararg>\\(.|\n) { STR_APPEND(yytext[1]); }
/* regexp backref inside string/arg */ /* regexp backref inside string/arg */
<str,vararg>[$][0-9] { <str,vararg,regsub>{REG_REF} {
if (str_ptr != str_buf) { if (!STR_EMPTY()) {
/* return what we have so far and scan '$x' again */ yyless(0); /* come back below */
*str_ptr = '\0'; yylval->cpVal = STR_RETURN();
yylval->cpVal = apr_pstrdup(yyextra->pool, str_buf);
str_ptr = str_buf;
yyless(0);
return T_STRING; return T_STRING;
} }
else { yylval->num = yytext[1] - '0';
yylval->num = yytext[1] - '0'; return T_REG_REF;
return T_REGEX_BACKREF;
}
}
<str,vararg>[^\\\n"'%}$]+ {
char *cp = yytext;
while (*cp != '\0') {
STR_APPEND(*cp);
cp++;
}
} }
/* variable inside string/arg */ /* variable inside string/arg */
<str,vararg>%\{ { <str,vararg,regsub>{VAR_BEGIN} {
if (str_ptr != str_buf) { if (!STR_EMPTY()) {
/* return what we have so far and scan '%{' again */ yyless(0); /* come back below */
*str_ptr = '\0'; yylval->cpVal = STR_RETURN();
yylval->cpVal = apr_pstrdup(yyextra->pool, str_buf);
yyless(0);
str_ptr = str_buf;
return T_STRING; return T_STRING;
} }
else { STATE_PUSH(var, 1);
yy_push_state(var, yyscanner);
return T_VAR_BEGIN;
}
}
<vararg>[%$] {
STR_APPEND(yytext[0]);
}
<str>[%}$] {
STR_APPEND(yytext[0]);
}
%\{ {
yy_push_state(var, yyscanner);
return T_VAR_BEGIN; return T_VAR_BEGIN;
} }
[$][0-9] { <str,vararg,regsub>{VAREXP_BEGIN} {
yylval->num = yytext[1] - '0'; if (!STR_EMPTY()) {
return T_REGEX_BACKREF; yyless(0); /* come back below */
yylval->cpVal = STR_RETURN();
return T_STRING;
}
STATE_PUSH(expr, 1);
return T_VAREXP_BEGIN;
} }
/* /* Any non-octal or octal higher than 377 (decimal 255) is invalid */
* fixed name variable expansion %{XXX} and function call in %{func:arg} syntax <str,vararg,regsub>\\([4-9][0-9]{2}|[8-9][0-9]{0,2}) {
*/ PERROR("Bad character escape sequence");
<var>[a-ij-rs-zA-IJ-RS-Z][a-ij-rs-zA-IJ-RS-Z0-9_]* { }
<str,vararg,regsub>\\[0-7]{1,3} {
int result;
(void)sscanf(yytext+1, "%o", &result);
STR_APPEND_NOCHECK(result);
}
<str,vararg,regsub>\\x[0-9A-Fa-f]{1,2} {
int result;
(void)sscanf(yytext+1, "%x", &result);
STR_APPEND_NOCHECK(result);
}
<str,vararg,regsub>\\n { STR_APPEND_NOCHECK('\n'); }
<str,vararg,regsub>\\r { STR_APPEND_NOCHECK('\r'); }
<str,vararg,regsub>\\t { STR_APPEND_NOCHECK('\t'); }
<str,vararg,regsub>\\b { STR_APPEND_NOCHECK('\b'); }
<str,vararg,regsub>\\f { STR_APPEND_NOCHECK('\f'); }
<str,vararg,regsub>\\. { STR_APPEND_CHECK(yytext[1], 1); }
<str,vararg,regsub>\n {
PERROR("Unterminated string or variable");
}
<str>{ANY} {
STR_APPEND_CHECK(yytext[0], 1);
}
<expr>{SPACE}+ {
/* NOP */
}
<expr>{QUOTE} {
STATE_PUSH(str, 1);
str_stop = yytext[0];
return T_STR_BEGIN;
}
<expr>{VAREXP_BEGIN} {
STATE_PUSH(expr, 1);
return T_VAREXP_BEGIN;
}
<expr>{VAREXP_END} {
STATE_POP(1); /* <expr> */
return T_VAREXP_END;
}
<expr>{VAR_BEGIN} {
STATE_PUSH(var, 1);
return T_VAR_BEGIN;
}
<var>{TOKEN} {
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext); yylval->cpVal = apr_pstrdup(yyextra->pool, yytext);
return T_ID; return T_ID;
} }
<var>{VAR_ARG} {
<var>\} { STATE_PUSH(vararg, 0);
yy_pop_state(yyscanner);
return T_VAR_END;
}
<var>: {
BEGIN(vararg);
return yytext[0]; return yytext[0];
} }
<vararg>{VAR_END} {
<var>.|\n { yyless(0); /* let <var> handle */
char *msg = apr_psprintf(yyextra->pool, yylval->cpVal = STR_RETURN();
"Invalid character in variable name '%c'", yytext[0]); STATE_POP(0); /* <vararg> */
PERROR(msg); return T_STRING;
}
<vararg>{ANY} {
STR_APPEND_CHECK(yytext[0], 1);
}
<var>{VAR_END} {
STATE_POP(1); /* <var> */
return T_VAR_END;
}
<var>{ANY} {
PERROR_CHAR("Unexpected variable character ", yytext[0]);
}
<var,vararg><<EOF>> {
PERROR("Unterminated variable");
} }
<vararg>\} {
if (str_ptr != str_buf) {
/* return what we have so far and scan '}' again */
*str_ptr = '\0';
yylval->cpVal = apr_pstrdup(yyextra->pool, str_buf);
str_ptr = str_buf;
yyless(0);
return T_STRING;
}
else {
yy_pop_state(yyscanner);
return T_VAR_END;
}
}
/* /*
* Regular Expression * Regular Expression
*/ */
"m"[/#$%^,;:_\?\|\^\-\!\.\'\"] { <expr>\/ {
regex_del = yytext[1]; STATE_PUSH(regex, 1);
regex_ptr = regex_buf; str_stop = yytext[0];
BEGIN(regex); str_flag = 'm';
return T_REGEX;
} }
"/" { <expr>[ms]{REG_SEP} {
regex_del = yytext[0]; STATE_PUSH(regex, 1);
regex_ptr = regex_buf; str_stop = yytext[1];
BEGIN(regex); str_flag = yytext[0];
return (str_flag == 'm') ? T_REGEX : T_REGSUB;
} }
<regex>.|\n { <regex>{ANY} {
if (yytext[0] == regex_del) { if (yytext[0] == str_stop) {
*regex_ptr = '\0'; STATE_POP(0); /* <regex> */
BEGIN(regex_flags); if (str_flag == 'm') {
STATE_PUSH(regflags, 0);
}
else {
STATE_PUSH(regsub, 0);
}
yylval->cpVal = STR_RETURN();
return T_REG_MATCH;
}
STR_APPEND_CHECK(yytext[0], 1);
}
<regsub>{ANY} {
if (yytext[0] == str_stop) {
STATE_POP(0); /* <regsub> */
STATE_PUSH(regflags, 0);
} }
else { else {
*regex_ptr++ = yytext[0]; STR_APPEND_CHECK(yytext[0], 1);
if (regex_ptr >= regex_buf + sizeof(regex_buf))
PERROR("Regexp too long");
} }
} }
<regex_flags>i { <regflags>{ANY} {
yylval->cpVal = apr_pstrdup(yyextra->pool, regex_buf); if (ap_strchr_c("ismg", yytext[0])) {
BEGIN(INITIAL); STR_APPEND_NOCHECK(yytext[0]);
return T_REGEX_I; }
else if (apr_isalnum(yytext[0])) {
PERROR("Invalid regexp flag(s)");
}
else {
yyless(0); /* not a flags, rewind */
yylval->cpVal = STR_RETURN();
STATE_POP(1); /* <regflags> */
return T_REG_FLAGS;
}
} }
<regex_flags>.|\n { <regflags><<EOF>> {
yylval->cpVal = apr_pstrdup(yyextra->pool, regex_buf); yylval->cpVal = STR_RETURN();
yyless(0); STATE_POP(1); /* <regflags> */
BEGIN(INITIAL); return T_REG_FLAGS;
return T_REGEX;
} }
<regex_flags><<EOF>> { <regex,regsub><<EOF>> {
yylval->cpVal = apr_pstrdup(yyextra->pool, regex_buf); PERROR("Unterminated regexp");
BEGIN(INITIAL); }
return T_REGEX;
<expr>{REG_REF} {
yylval->num = yytext[1] - '0';
return T_REG_REF;
} }
/* /*
* Operators * Operators
*/ */
==? { return T_OP_STR_EQ; } <expr>==? { return T_OP_STR_EQ; }
"!=" { return T_OP_STR_NE; } <expr>"!=" { return T_OP_STR_NE; }
"<" { return T_OP_STR_LT; } <expr>"<" { return T_OP_STR_LT; }
"<=" { return T_OP_STR_LE; } <expr>"<=" { return T_OP_STR_LE; }
">" { return T_OP_STR_GT; } <expr>">" { return T_OP_STR_GT; }
">=" { return T_OP_STR_GE; } <expr>">=" { return T_OP_STR_GE; }
"=~" { return T_OP_REG; } <expr>"=~" { return T_OP_REG; }
"!~" { return T_OP_NRE; } <expr>"!~" { return T_OP_NRE; }
"and" { return T_OP_AND; } <expr>"and" { return T_OP_AND; }
"&&" { return T_OP_AND; } <expr>"&&" { return T_OP_AND; }
"or" { return T_OP_OR; } <expr>"or" { return T_OP_OR; }
"||" { return T_OP_OR; } <expr>"||" { return T_OP_OR; }
"not" { return T_OP_NOT; } <expr>"not" { return T_OP_NOT; }
"!" { return T_OP_NOT; } <expr>"!" { return T_OP_NOT; }
"." { return T_OP_CONCAT; } <expr>"." { return T_OP_CONCAT; }
"-in" { return T_OP_IN; } <expr>"-in" { return T_OP_IN; }
"-eq" { return T_OP_EQ; } <expr>"-eq" { return T_OP_EQ; }
"-ne" { return T_OP_NE; } <expr>"-ne" { return T_OP_NE; }
"-ge" { return T_OP_GE; } <expr>"-ge" { return T_OP_GE; }
"-le" { return T_OP_LE; } <expr>"-le" { return T_OP_LE; }
"-gt" { return T_OP_GT; } <expr>"-gt" { return T_OP_GT; }
"-lt" { return T_OP_LT; } <expr>"-lt" { return T_OP_LT; }
/* for compatibility with ssl_expr */ /* for compatibility with ssl_expr */
"lt" { return T_OP_LT; } <expr>"lt" { return T_OP_LT; }
"le" { return T_OP_LE; } <expr>"le" { return T_OP_LE; }
"gt" { return T_OP_GT; } <expr>"gt" { return T_OP_GT; }
"ge" { return T_OP_GE; } <expr>"ge" { return T_OP_GE; }
"ne" { return T_OP_NE; } <expr>"ne" { return T_OP_NE; }
"eq" { return T_OP_EQ; } <expr>"eq" { return T_OP_EQ; }
"in" { return T_OP_IN; } <expr>"in" { return T_OP_IN; }
"-"[a-ij-rs-zA-IJ-RS-Z_] { <expr>"-"[a-zA-Z_][a-zA-Z_0-9]+ {
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext + 1);
return T_OP_BINARY;
}
<expr>"-"[a-zA-Z_] {
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext + 1); yylval->cpVal = apr_pstrdup(yyextra->pool, yytext + 1);
return T_OP_UNARY; return T_OP_UNARY;
} }
"-"[a-ij-rs-zA-IJ-RS-Z_][a-ij-rs-zA-IJ-RS-Z_0-9]+ { /* Split a string (or list) into a(nother) list */
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext + 1); <expr>"split" {
return T_OP_BINARY; STATE_PUSH(split, 0);
return T_OP_SPLIT;
}
<split>{REG_SEP} {
STATE_POP(0); /* <split> */
STATE_PUSH(regex, 1);
str_stop = yytext[0];
str_flag = 'S';
}
<split>{ANY} {
PERROR("Expecting split regular expression");
}
<split><<EOF>> {
PERROR("Unterminated split");
}
/* Join a list into a string */
<expr>"join" {
return T_OP_JOIN;
} }
/* /*
* Specials * Specials
*/ */
"true" { return T_TRUE; } <expr>"true" { return T_TRUE; }
"false" { return T_FALSE; } <expr>"false" { return T_FALSE; }
/* /*
* Digits * Digits
*/ */
-?[0-9]+ { <expr>\-?[0-9]+ {
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext); yylval->cpVal = apr_pstrdup(yyextra->pool, yytext);
return T_DIGIT; return T_DIGIT;
} }
@ -391,7 +492,7 @@
/* /*
* Identifiers * Identifiers
*/ */
[a-ij-rs-zA-IJ-RS-Z][a-ij-rs-zA-IJ-RS-Z0-9_]* { <expr>{TOKEN} {
yylval->cpVal = apr_pstrdup(yyextra->pool, yytext); yylval->cpVal = apr_pstrdup(yyextra->pool, yytext);
return T_ID; return T_ID;
} }
@ -399,16 +500,15 @@
/* /*
* These are parts of the grammar and are returned as is * These are parts of the grammar and are returned as is
*/ */
[(){},:] { <expr>[(){},] {
return yytext[0]; return yytext[0];
} }
/* /*
* Anything else is an error * Anything else is an error
*/ */
.|\n { <INITIAL,expr>{ANY} {
char *msg = apr_psprintf(yyextra->pool, "Parse error near '%c'", yytext[0]); PERROR_CHAR("Parse error near character ", yytext[0]);
PERROR(msg);
} }
%% %%

View File

@ -246,6 +246,10 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff,
options |= PCREn(NOTBOL); options |= PCREn(NOTBOL);
if ((eflags & AP_REG_NOTEOL) != 0) if ((eflags & AP_REG_NOTEOL) != 0)
options |= PCREn(NOTEOL); options |= PCREn(NOTEOL);
if ((eflags & AP_REG_NOTEMPTY) != 0)
options |= PCREn(NOTEMPTY);
if ((eflags & AP_REG_ANCHORED) != 0)
options |= PCREn(ANCHORED);
#ifdef HAVE_PCRE2 #ifdef HAVE_PCRE2
/* TODO: create a generic TLS matchdata buffer of some nmatch limit, /* TODO: create a generic TLS matchdata buffer of some nmatch limit,