From d8848084e32dfcf00b7cc6b8cd4f12de3cd1426e Mon Sep 17 00:00:00 2001 From: Yann Ylavic Date: Mon, 22 Jun 2020 10:37:41 +0000 Subject: [PATCH] Allow for proxy servlet mapping at pre_translate_name stage. Provide alias_match_servlet(), the servlet counterpart of alias_match(), which maps the request URI-path to the ProxyPass alias ignoring path parameters, while still forwarding them (above the alias). This is needed to proxy servlet URIs for application handled by Tomcat, which can then make use of the path/segments parameters. Github: closes #128 git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1879080 13f79535-47bb-0310-9956-ffa450edef68 --- modules/proxy/mod_proxy.c | 253 ++++++++++++++++++++++++++++++++++++-- modules/proxy/mod_proxy.h | 4 + 2 files changed, 247 insertions(+), 10 deletions(-) diff --git a/modules/proxy/mod_proxy.c b/modules/proxy/mod_proxy.c index edbf1bcc94..3e2e3e4319 100644 --- a/modules/proxy/mod_proxy.c +++ b/modules/proxy/mod_proxy.c @@ -570,6 +570,198 @@ static int alias_match(const char *uri, const char *alias_fakename) return urip - uri; } +/* + * Inspired by mod_jk's jk_servlet_normalize(). + */ +static int alias_match_servlet(apr_pool_t *p, + const char *uri, + const char *alias) +{ + char *map; + apr_array_header_t *stack; + int map_pos, uri_pos, alias_pos, first_pos; + int alias_depth = 0, depth; + + /* Both uri and alias should start with '/' */ + if (uri[0] != '/' || alias[0] != '/') { + return 0; + } + + stack = apr_array_make(p, 5, sizeof(int)); + map = apr_palloc(p, strlen(uri) + 1); + map[0] = '/'; + map[1] = '\0'; + + map_pos = uri_pos = alias_pos = first_pos = 1; + while (uri[uri_pos] != '\0') { + /* Remove path parameters ;foo=bar/ from any path segment */ + if (uri[uri_pos] == ';') { + do { + uri_pos++; + } while (uri[uri_pos] != '/' && uri[uri_pos] != '\0'); + continue; + } + + if (map[map_pos - 1] == '/') { + /* Collapse ///// sequences to / */ + if (uri[uri_pos] == '/') { + do { + uri_pos++; + } while (uri[uri_pos] == '/'); + continue; + } + + if (uri[uri_pos] == '.') { + /* Remove /./ segments */ + if (uri[uri_pos + 1] == '/' + || uri[uri_pos + 1] == ';' + || uri[uri_pos + 1] == '\0') { + uri_pos++; + if (uri[uri_pos] == '/') { + uri_pos++; + } + continue; + } + + /* Remove /xx/../ segments */ + if (uri[uri_pos + 1] == '.' + && (uri[uri_pos + 2] == '/' + || uri[uri_pos + 2] == ';' + || uri[uri_pos + 2] == '\0')) { + /* Wind map segment back the previous one */ + if (map_pos == 1) { + /* Above root */ + return 0; + } + do { + map_pos--; + } while (map[map_pos - 1] != '/'); + map[map_pos] = '\0'; + + /* Wind alias segment back, unless in deeper segment */ + if (alias_depth == stack->nelts) { + if (alias[alias_pos] == '\0') { + alias_pos--; + } + while (alias_pos > 0 && alias[alias_pos] == '/') { + alias_pos--; + } + while (alias_pos > 0 && alias[alias_pos - 1] != '/') { + alias_pos--; + } + AP_DEBUG_ASSERT(alias_pos > 0); + alias_depth--; + } + apr_array_pop(stack); + + /* Move uri forward to the next segment */ + uri_pos += 2; + if (uri[uri_pos] == '/') { + uri_pos++; + } + first_pos = 0; + continue; + } + } + if (first_pos) { + while (uri[first_pos] == '/') { + first_pos++; + } + } + + /* New segment */ + APR_ARRAY_PUSH(stack, int) = first_pos ? first_pos : uri_pos; + if (alias[alias_pos] != '\0') { + if (alias[alias_pos - 1] != '/') { + /* Remain in pair with uri segments */ + do { + alias_pos++; + } while (alias[alias_pos - 1] != '/' && alias[alias_pos]); + } + while (alias[alias_pos] == '/') { + alias_pos++; + } + if (alias[alias_pos] != '\0') { + alias_depth++; + } + } + } + + if (alias[alias_pos] != '\0') { + int *match = &APR_ARRAY_IDX(stack, alias_depth - 1, int); + if (*match) { + if (alias[alias_pos] != uri[uri_pos]) { + /* Current segment does not match */ + *match = 0; + } + else if (alias[alias_pos + 1] == '\0' + && alias[alias_pos] != '/') { + if (uri[uri_pos + 1] == ';') { + /* We'll preserve the parameters of the last + * segment if it does not end with '/', so mark + * the match as negative for below handling. + */ + *match = -(uri_pos + 1); + } + else if (uri[uri_pos + 1] != '/' + && uri[uri_pos + 1] != '\0') { + /* Last segment does not match all the way */ + *match = 0; + } + } + } + /* Don't go past the segment if the uri isn't there yet */ + if (alias[alias_pos] != '/' || uri[uri_pos] == '/') { + alias_pos++; + } + } + + if (uri[uri_pos] == '/') { + first_pos = uri_pos + 1; + } + map[map_pos++] = uri[uri_pos++]; + map[map_pos] = '\0'; + } + + /* Can't reach the end of uri before the end of the alias, + * for example if uri is "/" and alias is "/examples" + */ + if (alias[alias_pos] != '\0') { + return 0; + } + + /* Check whether each alias segment matched */ + for (depth = 0; depth < alias_depth; ++depth) { + if (!APR_ARRAY_IDX(stack, depth, int)) { + return 0; + } + } + + /* If alias_depth == stack->nelts we have a full match, i.e. + * uri == alias so we can return uri_pos as is (the end of uri) + */ + if (alias_depth < stack->nelts) { + /* Return the segment following the alias */ + uri_pos = APR_ARRAY_IDX(stack, alias_depth, int); + if (alias_depth) { + /* But if the last segment of the alias does not end with '/' + * and the corresponding segment of the uri has parameters, + * we want to forward those parameters (see above for the + * negative pos trick/mark). + */ + int pos = APR_ARRAY_IDX(stack, alias_depth - 1, int); + if (pos < 0) { + uri_pos = -pos; + } + } + } + /* If the alias lacks a trailing slash, take it from the uri (if any) */ + if (alias[alias_pos - 1] != '/' && uri[uri_pos - 1] == '/') { + uri_pos--; + } + return uri_pos; +} + /* Detect if an absoluteURI should be proxied or not. Note that we * have to do this during this phase because later phases are * "short-circuiting"... i.e. translate_names will end when the first @@ -740,7 +932,18 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_rec *r, struct proxy_alias *ent, } } else { - len = alias_match(r->uri, fake); + /* Ignore servlet mapping if r->uri was decoded already, or we + * might consider for instance that an original %3B is a delimiter + * for path parameters (which is not). + */ + if (!dconf->mapping_decoded + && (ent->flags & PROXYPASS_MAPPING_SERVLET)) { + nocanon = 0; /* ignored since servlet's normalization applies */ + len = alias_match_servlet(r->pool, r->uri, fake); + } + else { + len = alias_match(r->uri, fake); + } if (len != 0) { if ((real[0] == '!') && (real[1] == '\0')) { @@ -795,7 +998,7 @@ PROXY_DECLARE(int) ap_proxy_trans_match(request_rec *r, struct proxy_alias *ent, return DONE; } -static int proxy_trans(request_rec *r) +static int proxy_trans(request_rec *r, int pre_trans) { int i; struct proxy_alias *ent; @@ -809,6 +1012,20 @@ static int proxy_trans(request_rec *r) return OK; } + /* In early pre_trans hook, r->uri was not manipulated yet so we are + * compliant with RFC1945 at this point. Otherwise, it probably isn't + * an issue because this is a hybrid proxy/origin server. + */ + + dconf = ap_get_module_config(r->per_dir_config, &proxy_module); + + /* Do the work from the hook corresponding to the ProxyMappingDecoded + * configuration (on/default: translate hook, off: pre_translate hook). + */ + if (pre_trans ^ !dconf->mapping_decoded) { + return DECLINED; + } + if ((r->unparsed_uri[0] == '*' && r->unparsed_uri[1] == '\0') || !r->uri || r->uri[0] != '/') { return DECLINED; @@ -818,13 +1035,6 @@ static int proxy_trans(request_rec *r) return DECLINED; } - /* XXX: since r->uri has been manipulated already we're not really - * compliant with RFC1945 at this point. But this probably isn't - * an issue because this is a hybrid proxy/origin server. - */ - - dconf = ap_get_module_config(r->per_dir_config, &proxy_module); - /* short way - this location is reverse proxied? */ if (dconf->alias) { int rv = ap_proxy_trans_match(r, dconf->alias, dconf); @@ -846,9 +1056,19 @@ static int proxy_trans(request_rec *r) } } } + return DECLINED; } +static int proxy_pre_translate_name(request_rec *r) +{ + return proxy_trans(r, 1); +} +static int proxy_translate_name(request_rec *r) +{ + return proxy_trans(r, 0); +} + static int proxy_walk(request_rec *r) { proxy_server_conf *sconf = ap_get_module_config(r->server->module_config, @@ -1593,6 +1813,7 @@ static void *create_proxy_dir_config(apr_pool_t *p, char *dummy) new->add_forwarded_headers_set = 0; new->forward_100_continue = 1; new->forward_100_continue_set = 0; + new->mapping_decoded = -1; /* unset (on by default) */ return (void *) new; } @@ -1651,6 +1872,9 @@ static void *merge_proxy_dir_config(apr_pool_t *p, void *basev, void *addv) new->forward_100_continue_set = add->forward_100_continue_set || base->forward_100_continue_set; + new->mapping_decoded = (add->mapping_decoded == -1) ? base->mapping_decoded + : add->mapping_decoded; + return new; } @@ -1806,6 +2030,9 @@ static const char * else if (!strcasecmp(word,"noquery")) { flags |= PROXYPASS_NOQUERY; } + else if (!strcasecmp(word,"mapping=servlet")) { + flags |= PROXYPASS_MAPPING_SERVLET; + } else { char *val = strchr(word, '='); if (!val) { @@ -2799,6 +3026,9 @@ static const command_rec proxy_cmds[] = AP_INIT_FLAG("Proxy100Continue", forward_100_continue, NULL, RSRC_CONF|ACCESS_CONF, "on if 100-Continue should be forwarded to the origin server, off if the " "proxy should handle it by itself"), + AP_INIT_FLAG("ProxyMappingDecoded", ap_set_flag_slot_char, + (void*)APR_OFFSETOF(proxy_dir_conf, mapping_decoded), RSRC_CONF|ACCESS_CONF, + "Whether to percent-decode before URI-path mapping"), {NULL} }; @@ -3147,7 +3377,10 @@ static void register_hooks(apr_pool_t *p) /* handler */ ap_hook_handler(proxy_handler, NULL, NULL, APR_HOOK_FIRST); /* filename-to-URI translation */ - ap_hook_translate_name(proxy_trans, aszSucc, NULL, APR_HOOK_FIRST); + ap_hook_pre_translate_name(proxy_pre_translate_name, NULL, NULL, + APR_HOOK_MIDDLE); + ap_hook_translate_name(proxy_translate_name, aszSucc, NULL, + APR_HOOK_FIRST); /* walk entries and suppress default TRACE behavior */ ap_hook_map_to_storage(proxy_map_location, NULL,NULL, APR_HOOK_FIRST); /* fixups */ diff --git a/modules/proxy/mod_proxy.h b/modules/proxy/mod_proxy.h index 4e8c6e07af..96cc8c26ed 100644 --- a/modules/proxy/mod_proxy.h +++ b/modules/proxy/mod_proxy.h @@ -123,6 +123,7 @@ struct proxy_remote { #define PROXYPASS_NOCANON 0x01 #define PROXYPASS_INTERPOLATE 0x02 #define PROXYPASS_NOQUERY 0x04 +#define PROXYPASS_MAPPING_SERVLET 0x08 struct proxy_alias { const char *real; const char *fake; @@ -243,6 +244,9 @@ typedef struct { unsigned int forward_100_continue_set:1; apr_array_header_t *error_override_codes; + + /** Whether to map percent-decoded URI-path */ + char mapping_decoded; } proxy_dir_conf; /* if we interpolate env vars per-request, we'll need a per-request