mod_proxy_html.c revision 1861c6caddf9043aff6d7b1a9e2ffb515be652ff
af84459fbf938e508fd10b01cb8d699c79083813takashi/* Copyright (c) 2003-11, WebThing Ltd
af84459fbf938e508fd10b01cb8d699c79083813takashi * Copyright (c) 2011-, The Apache Software Foundation
af84459fbf938e508fd10b01cb8d699c79083813takashi * Licensed to the Apache Software Foundation (ASF) under one or more
af84459fbf938e508fd10b01cb8d699c79083813takashi * contributor license agreements. See the NOTICE file distributed with
af84459fbf938e508fd10b01cb8d699c79083813takashi * this work for additional information regarding copyright ownership.
af84459fbf938e508fd10b01cb8d699c79083813takashi * The ASF licenses this file to You under the Apache License, Version 2.0
af84459fbf938e508fd10b01cb8d699c79083813takashi * (the "License"); you may not use this file except in compliance with
af84459fbf938e508fd10b01cb8d699c79083813takashi * the License. You may obtain a copy of the License at
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * Unless required by applicable law or agreed to in writing, software
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * distributed under the License is distributed on an "AS IS" BASIS,
af84459fbf938e508fd10b01cb8d699c79083813takashi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
af84459fbf938e508fd10b01cb8d699c79083813takashi * See the License for the specific language governing permissions and
af84459fbf938e508fd10b01cb8d699c79083813takashi * limitations under the License.
af84459fbf938e508fd10b01cb8d699c79083813takashi/* GO_FASTER
af84459fbf938e508fd10b01cb8d699c79083813takashi You can #define GO_FASTER to disable trace logging.
af84459fbf938e508fd10b01cb8d699c79083813takashi/* libxml2 */
af84459fbf938e508fd10b01cb8d699c79083813takashi/* globals set once at startup */
af84459fbf938e508fd10b01cb8d699c79083813takashistatic apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
af84459fbf938e508fd10b01cb8d699c79083813takashistatic apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef struct {
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* val;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef struct {
af84459fbf938e508fd10b01cb8d699c79083813takashi unsigned int start;
af84459fbf938e508fd10b01cb8d699c79083813takashi unsigned int end;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef struct urlmap {
af84459fbf938e508fd10b01cb8d699c79083813takashi unsigned int flags;
af84459fbf938e508fd10b01cb8d699c79083813takashi unsigned int regflags;
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* c;
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* to;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef struct {
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* doctype;
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* etag;
3c13a815670b54d1c17bf02954f7d2b066cde95cnd unsigned int flags;
3c13a815670b54d1c17bf02954f7d2b066cde95cnd const char* charset_out;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef struct {
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* encoding;
af84459fbf938e508fd10b01cb8d699c79083813takashitypedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t;
3c13a815670b54d1c17bf02954f7d2b066cde95cndstatic const char* const fpi_html =
af84459fbf938e508fd10b01cb8d699c79083813takashi "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n";
af84459fbf938e508fd10b01cb8d699c79083813takashistatic const char* const fpi_html_legacy =
af84459fbf938e508fd10b01cb8d699c79083813takashi "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
af84459fbf938e508fd10b01cb8d699c79083813takashistatic const char* const fpi_xhtml =
3c13a815670b54d1c17bf02954f7d2b066cde95cnd "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n";
af84459fbf938e508fd10b01cb8d699c79083813takashistatic const char* const fpi_xhtml_legacy =
af84459fbf938e508fd10b01cb8d699c79083813takashi "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
af84459fbf938e508fd10b01cb8d699c79083813takashi/*#define DEFAULT_DOCTYPE fpi_html */
af84459fbf938e508fd10b01cb8d699c79083813takashi for (p = str; *p; ++p)
af84459fbf938e508fd10b01cb8d699c79083813takashi for (p = ap_strchr(str, '\\'); p; p = ap_strchr(p+1, '\\'))
af84459fbf938e508fd10b01cb8d699c79083813takashi/* This is always utf-8 on entry. We can convert charset within FLUSH */
af84459fbf938e508fd10b01cb8d699c79083813takashi#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0); begin = i+1
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pcharacters(void* ctxt, const xmlChar *uchars, int length)
af84459fbf938e508fd10b01cb8d699c79083813takashi switch (chars[i]) {
af84459fbf938e508fd10b01cb8d699c79083813takashi case '&' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&"); break;
af84459fbf938e508fd10b01cb8d699c79083813takashi case '<' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "<"); break;
af84459fbf938e508fd10b01cb8d699c79083813takashi case '>' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, ">"); break;
af84459fbf938e508fd10b01cb8d699c79083813takashi case '"' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, """); break;
af84459fbf938e508fd10b01cb8d699c79083813takashi default : break;
af84459fbf938e508fd10b01cb8d699c79083813takashi (int(*)(void*))free);
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pappend(saxctxt* ctx, const char* buf, const size_t len)
af84459fbf938e508fd10b01cb8d699c79083813takashi char c = 0;
af84459fbf938e508fd10b01cb8d699c79083813takashi /* parse the text for URLs */
af84459fbf938e508fd10b01cb8d699c79083813takashi while (!ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0)) {
af84459fbf938e508fd10b01cb8d699c79083813takashi subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi if ((m->flags & M_ATEND) && (match < (len - s_from)))
af84459fbf938e508fd10b01cb8d699c79083813takashi VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi "C: matched %s, substituting %s",
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pcdata(void* ctxt, const xmlChar *uchars, int length)
af84459fbf938e508fd10b01cb8d699c79083813takashi /* not sure if this should force-flush
af84459fbf938e508fd10b01cb8d699c79083813takashi * (i.e. can one cdata section come in multiple calls?)
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pcomment(void* ctxt, const xmlChar *uchars)
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pendElement(void* ctxt, const xmlChar* uname)
af84459fbf938e508fd10b01cb8d699c79083813takashi if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
af84459fbf938e508fd10b01cb8d699c79083813takashi /* enforce html */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* enforce html legacy */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* TODO - implement HTML "allowed here" using the stack */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* nah. Keeping the stack is too much overhead */
af84459fbf938e508fd10b01cb8d699c79083813takashi ctx->offset = 0; /* having dumped it, we can re-use the memory */
af84459fbf938e508fd10b01cb8d699c79083813takashistatic void pstartElement(void* ctxt, const xmlChar* uname,
af84459fbf938e508fd10b01cb8d699c79083813takashi const char** a;
af84459fbf938e508fd10b01cb8d699c79083813takashi const void** descp;
af84459fbf938e508fd10b01cb8d699c79083813takashi if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
af84459fbf938e508fd10b01cb8d699c79083813takashi /* enforce html */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* enforce html legacy */
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi /* TODO - implement HTML "allowed here" */
a610901168de82df5fc5d99b8759fd80e0f70aeasf if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
af84459fbf938e508fd10b01cb8d699c79083813takashi linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING);
af84459fbf938e508fd10b01cb8d699c79083813takashi switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi "Bogus HTML attribute %s of %s dropped",
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi "Deprecated HTML attribute %s of %s dropped",
af84459fbf938e508fd10b01cb8d699c79083813takashi required_attrs--; /* cross off the number still needed */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* fallthrough - required implies valid */
af84459fbf938e508fd10b01cb8d699c79083813takashi if (a[1]) {
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* f;
af84459fbf938e508fd10b01cb8d699c79083813takashi "H/RX: match at %s, substituting %s",
af84459fbf938e508fd10b01cb8d699c79083813takashi "H: matched %s, substituting %s",
af84459fbf938e508fd10b01cb8d699c79083813takashi else { /* it fits in the existing space */
af84459fbf938e508fd10b01cb8d699c79083813takashi /* URIs only want one match unless overridden in the config */
af84459fbf938e508fd10b01cb8d699c79083813takashi num_match = 0; /* reset here since we're working per-rule */
af84459fbf938e508fd10b01cb8d699c79083813takashi subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* f;
af84459fbf938e508fd10b01cb8d699c79083813takashi "E/RX: match at %s, substituting %s",
af84459fbf938e508fd10b01cb8d699c79083813takashi "E: matched %s, substituting %s",
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL);
af84459fbf938e508fd10b01cb8d699c79083813takashi /* write the attribute, using pcharacters to html-escape
af84459fbf938e508fd10b01cb8d699c79083813takashi anything that needs it in the value.
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL);
af84459fbf938e508fd10b01cb8d699c79083813takashi pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf));
af84459fbf938e508fd10b01cb8d699c79083813takashi /* if there are more required attributes than we found then complain */
af84459fbf938e508fd10b01cb8d699c79083813takashi ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
af84459fbf938e508fd10b01cb8d699c79083813takashi "HTML element %s is missing %d required attributes",
af84459fbf938e508fd10b01cb8d699c79083813takashistatic meta* metafix(request_rec* r, const char* buf)
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* p;
af84459fbf938e508fd10b01cb8d699c79083813takashi const char* q;
af84459fbf938e508fd10b01cb8d699c79083813takashi while (!ap_regexec(seek_meta, buf+offs, 2, pmatch, 0)) {
af84459fbf938e508fd10b01cb8d699c79083813takashi while (!isalpha(*++p));
af84459fbf938e508fd10b01cb8d699c79083813takashi /* find content=... string */
af84459fbf938e508fd10b01cb8d699c79083813takashi p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
af84459fbf938e508fd10b01cb8d699c79083813takashi /* if it doesn't contain "content", ignore, don't crash! */
5effc8b39fae5cd169d17f342bfc265705840014rbowen if (p != NULL) {
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen while (*p) {
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen while (*p && isspace(*p))
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen if (*p != '=')
while (*p && isspace(*++p));
delim = *p++;
for (q = p; *q != delim; ++q);
#ifndef GO_FASTER
return ret;
const char* start;
const char* end;
const char* delim;
const char* before;
const char* after;
const char* replacement;
const char* var;
if (delim) {
if (!replacement) {
if (delim)
return str;
urlmap* p;
const char *err;
if (err) {
if (ok == 0) {
if (prev)
if (!f->ctx) {
const char* force;
if (!force) {
if (!f->r->proxyreq) {
else if (!f->r->content_type) {
if (errmsg) {
#ifndef GO_FASTER
return NULL;
fctx->f = f;
return f->ctx;
apr_bucket* b;
const char* buf = 0;
#ifndef USE_OLD_LIBXML2
if (!ctxt)
b = APR_BUCKET_NEXT(b)) {
if (APR_BUCKET_IS_METADATA(b)) {
if (APR_BUCKET_IS_EOS(b)) {
else if (APR_BUCKET_IS_FLUSH(b)) {
== APR_SUCCESS) {
const char* cenc;
if (!xml2enc_charset ||
if (!xml2enc_charset)
ap_set_content_type(f->r,
return rv;
(int(*)(void*))htmlFreeParserCtxt,
#ifndef USE_OLD_LIBXML2
return APR_SUCCESS;
return ret;
urlmap* a;
return conf;
return err;
const char* usage =
const char* from;
const char* to;
const char* flags;
return usage;
return usage;
return NULL;
return NULL;
return NULL;
if (!attrs) {
return NULL;
{ NULL }
if (!xml2enc_charset) {
return OK;
if (xml2enc_filter)
NULL,
NULL,