ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq/* Licensed to the Apache Software Foundation (ASF) under one or more
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * contributor license agreements. See the NOTICE file distributed with
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * this work for additional information regarding copyright ownership.
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * The ASF licenses this file to You under the Apache License, Version 2.0
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * (the "License"); you may not use this file except in compliance with
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * the License. You may obtain a copy of the License at
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * http://www.apache.org/licenses/LICENSE-2.0
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * Unless required by applicable law or agreed to in writing, software
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * distributed under the License is distributed on an "AS IS" BASIS,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * See the License for the specific language governing permissions and
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * limitations under the License.
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "apr.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "apr_lib.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "apr_pools.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "apr_strings.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "ap_config.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "ap_regex.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#include "httpd.h"
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
21fadb63e226ab604c422238ea1a53cbc38974d0trawickstatic apr_status_t rxplus_cleanup(void *preg)
21fadb63e226ab604c422238ea1a53cbc38974d0trawick{
21fadb63e226ab604c422238ea1a53cbc38974d0trawick ap_regfree((ap_regex_t *) preg);
21fadb63e226ab604c422238ea1a53cbc38974d0trawick return APR_SUCCESS;
21fadb63e226ab604c422238ea1a53cbc38974d0trawick}
21fadb63e226ab604c422238ea1a53cbc38974d0trawick
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniqAP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *pattern)
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq{
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* perl style patterns
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * add support for more as and when wanted
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * substitute: s/rx/subs/
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * match: m/rx/ or just /rx/
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* allow any nonalnum delimiter as first or second char.
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * If we ever use this with non-string pattern we'll need an extra check
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *endp = 0;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *str = pattern;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *rxstr;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ap_rxplus_t *ret = apr_pcalloc(pool, sizeof(ap_rxplus_t));
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq char delim = 0;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (!apr_isalnum(pattern[0])) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq delim = *str++;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq action = SUBSTITUTE;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq delim = pattern[1];
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq str += 2;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq delim = pattern[1];
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq str += 2;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* TODO: support perl's after/before */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* FIXME: fix these simplminded delims */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* we think there's a delimiter. Allow for it not to be if unmatched */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (delim) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq endp = ap_strchr_c(str, delim);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (!endp) { /* there's no delim or flags */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (ap_regcomp(&ret->rx, pattern, 0) == 0) {
21fadb63e226ab604c422238ea1a53cbc38974d0trawick apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq apr_pool_cleanup_null);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return ret;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq else {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return NULL;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* We have a delimiter. Use it to extract the regexp */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq rxstr = apr_pstrndup(pool, str, endp-str);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* If it's a substitution, we need the replacement string
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * TODO: possible future enhancement - support other parsing
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * in the replacement string.
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (action == SUBSTITUTE) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq str = endp+1;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (!*str || (endp = ap_strchr_c(str, delim), !endp)) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* missing replacement string is an error */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return NULL;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ret->subs = apr_pstrndup(pool, str, (endp-str));
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* anything after the current delimiter is flags */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq while (*++endp) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq switch (*endp) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case 'i': ret->flags |= AP_REG_ICASE; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case 'm': ret->flags |= AP_REG_NEWLINE; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case 'n': ret->flags |= AP_REG_NOMEM; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case 'g': ret->flags |= AP_REG_MULTI; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case 's': ret->flags |= AP_REG_DOTALL; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case '^': ret->flags |= AP_REG_NOTBOL; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case '$': ret->flags |= AP_REG_NOTEOL; break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq default: break; /* we should probably be stricter here */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (ap_regcomp(&ret->rx, rxstr, ret->flags) == 0) {
21fadb63e226ab604c422238ea1a53cbc38974d0trawick apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq apr_pool_cleanup_null);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq else {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return NULL;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (!(ret->flags & AP_REG_NOMEM)) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* count size of memory required, starting at 1 for the whole-match
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * Simpleminded should be fine 'cos regcomp already checked syntax
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ret->nmatch = 1;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq while (*rxstr) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq switch (*rxstr++) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case '\\': /* next char is escaped - skip it */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (*rxstr != 0) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ++rxstr;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq case '(': /* unescaped bracket implies memory */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ++ret->nmatch;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq default:
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq break;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t));
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return ret;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq}
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniqAP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *pattern, char **newpattern)
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq{
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq int ret = 1;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq int startl, oldl, newl, diffsz;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *remainder;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq char *subs;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq/* snrf process_regexp from mod_headers */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq rx->match = NULL;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return 0; /* no match, nothing to do */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq rx->match = pattern;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (rx->subs) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *newpattern = ap_pregsub(pool, rx->subs, pattern,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq rx->nmatch, rx->pmatch);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (!*newpattern) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return 0; /* FIXME - should we do more to handle error? */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq startl = rx->pmatch[0].rm_so;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq oldl = rx->pmatch[0].rm_eo - startl;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq newl = strlen(*newpattern);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq diffsz = newl - oldl;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq remainder = pattern + startl + oldl;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (rx->flags & AP_REG_MULTI) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* recurse to do any further matches */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ret += ap_rxplus_exec(pool, rx, remainder, &subs);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (ret > 1) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq /* a further substitution happened */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq diffsz += strlen(subs) - strlen(remainder);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq remainder = subs;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq subs = apr_palloc(pool, strlen(pattern) + 1 + diffsz);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq memcpy(subs, pattern, startl);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq memcpy(subs+startl, *newpattern, newl);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq strcpy(subs+startl+newl, remainder);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *newpattern = subs;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return ret;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq}
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#ifdef DOXYGEN
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniqAP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx)
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq{
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq return (rx->match != NULL) ? rx->nmatch : 0;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq}
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq#endif
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq/* If this blows up on you, see the notes in the header/apidoc
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * rx->match is a pointer and it's your responsibility to ensure
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq * it hasn't gone out-of-scope since the last ap_rxplus_exec
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq */
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniqAP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len,
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char **match)
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq{
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq if (n >= 0 && n < ap_rxplus_nmatch(rx)) {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *match = rx->match + rx->pmatch[n].rm_so;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq else {
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *len = -1;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq *match = NULL;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq }
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq}
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniqAP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n)
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq{
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq int len;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq const char *match;
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq ap_rxplus_match(rx, n, &len, &match);
65a6b185cb75e4954c105b39843e1f77ee762641jailletc return apr_pstrndup(pool, match, len);
ac45a43afbf38aa4a91c1402c6beef6ef8a2696dniq}