mod_proxy_html.c revision 63a5c72a2b2f46b899c32eee216f9d5dd4072acb
97a9a944b5887e91042b019776c41d5dd74557aferikabele/* Copyright (c) 2003-11, WebThing Ltd
97a9a944b5887e91042b019776c41d5dd74557aferikabele * Copyright (c) 2011-, The Apache Software Foundation
a945f35eff8b6a88009ce73de6d4c862ce58de3cslive * Licensed to the Apache Software Foundation (ASF) under one or more
a945f35eff8b6a88009ce73de6d4c862ce58de3cslive * contributor license agreements. See the NOTICE file distributed with
a945f35eff8b6a88009ce73de6d4c862ce58de3cslive * this work for additional information regarding copyright ownership.
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * The ASF licenses this file to You under the Apache License, Version 2.0
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * (the "License"); you may not use this file except in compliance with
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * the License. You may obtain a copy of the License at
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * Unless required by applicable law or agreed to in writing, software
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * distributed under the License is distributed on an "AS IS" BASIS,
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3f08db06526d6901aa08c110b5bc7dde6bc39905nd * See the License for the specific language governing permissions and
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * limitations under the License.
3f08db06526d6901aa08c110b5bc7dde6bc39905nd/* GO_FASTER
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd You can #define GO_FASTER to disable trace logging.
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd/* libxml2 */
0dcb04cfa4202dd443d362f20db0de81faa2eb17nd/* globals set once at startup */
0dcb04cfa4202dd443d362f20db0de81faa2eb17ndstatic apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndstatic apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
9a58dc6a2b26ec128b1270cf48810e705f1a90dbsftypedef struct {
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd const char *val;
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndtypedef struct {
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd unsigned int start;
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd unsigned int end;
06ba4a61654b3763ad65f52283832ebf058fdf1cslivetypedef struct urlmap {
06ba4a61654b3763ad65f52283832ebf058fdf1cslive unsigned int flags;
115d4988233465895664ae2e19ba6c281281a00bnd unsigned int regflags;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive const char *c;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive const char *to;
06ba4a61654b3763ad65f52283832ebf058fdf1cslivetypedef struct {
06ba4a61654b3763ad65f52283832ebf058fdf1cslive const char *doctype;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive const char *etag;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive unsigned int flags;
9a58dc6a2b26ec128b1270cf48810e705f1a90dbsf const char *charset_out;
06ba4a61654b3763ad65f52283832ebf058fdf1cslivetypedef struct {
06ba4a61654b3763ad65f52283832ebf058fdf1cslive const char *encoding;
06ba4a61654b3763ad65f52283832ebf058fdf1cslivetypedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t;
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic const char *const fpi_html =
06ba4a61654b3763ad65f52283832ebf058fdf1cslive "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n";
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic const char *const fpi_html_legacy =
06ba4a61654b3763ad65f52283832ebf058fdf1cslive "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic const char *const fpi_xhtml =
06ba4a61654b3763ad65f52283832ebf058fdf1cslive "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n";
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic const char *const fpi_xhtml_legacy =
06ba4a61654b3763ad65f52283832ebf058fdf1cslive "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic const char *const fpi_html5 = "<!DOCTYPE html>\n";
06ba4a61654b3763ad65f52283832ebf058fdf1cslive/*#define DEFAULT_DOCTYPE fpi_html */
06ba4a61654b3763ad65f52283832ebf058fdf1cslive for (p = str; *p; ++p)
115d4988233465895664ae2e19ba6c281281a00bnd for (p = ap_strchr(str, '\\'); p; p = ap_strchr(p+1, '\\'))
06ba4a61654b3763ad65f52283832ebf058fdf1cslive/* This is always utf-8 on entry. We can convert charset within FLUSH */
06ba4a61654b3763ad65f52283832ebf058fdf1cslive#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0); begin = i+1
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic void pcharacters(void *ctxt, const xmlChar *uchars, int length)
06ba4a61654b3763ad65f52283832ebf058fdf1cslive switch (chars[i]) {
115d4988233465895664ae2e19ba6c281281a00bnd case '&' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&"); break;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive case '<' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "<"); break;
115d4988233465895664ae2e19ba6c281281a00bnd case '>' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, ">"); break;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive case '"' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, """); break;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive default : break;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive (int(*)(void*))free);
115d4988233465895664ae2e19ba6c281281a00bndstatic void pappend(saxctxt *ctx, const char *buf, const size_t len)
06ba4a61654b3763ad65f52283832ebf058fdf1cslive char c = 0;
06ba4a61654b3763ad65f52283832ebf058fdf1cslive /* parse the text for URLs */
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd while (!ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0)) {
115d4988233465895664ae2e19ba6c281281a00bnd VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r,
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd "C: matched %s, substituting %s",
06ba4a61654b3763ad65f52283832ebf058fdf1cslivestatic void pcdata(void *ctxt, const xmlChar *uchars, int length)
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd /* not sure if this should force-flush
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd * (i.e. can one cdata section come in multiple calls?)
if (!desc)
int required_attrs;
int num_match;
char *subs;
urlmap *m;
char *found;
#ifndef GO_FASTER
#ifdef HAVE_STACK
const void** descp;
int enforce = 0;
if (!desc) {
#ifdef HAVE_STACK
required_attrs = 0;
if (attrs) {
case HTML_INVALID:
*a, name);
case HTML_DEPRECATED:
*a, name);
case HTML_REQUIRED:
if (linkattrs) {
switch (is_uri) {
case ATTR_URI:
num_match = 0;
pmatch, 0)) {
++num_match;
VERBOSE({
ctx->f->r,
f, subs);
++num_match;
0, ctx->f->r,
case ATTR_EVENT:
offs = 0;
VERBOSE({
ctx->f->r,
f, subs);
++num_match;
while (found) {
m->from.c);
m->from.c);
0, ctx->f->r,
++num_match;
case ATTR_IGNORE:
char *header;
char *content;
char delim;
while (!apr_isalpha(*++p));
if (p != NULL) {
while (apr_isspace(*p))
while (*p && apr_isspace(*++p));
delim = *p++;
for (q = p; *q != delim; ++q);
#ifndef GO_FASTER
return ret;
const char *start;
const char *end;
const char *delim;
const char *before;
const char *after;
const char *replacement;
const char *var;
if (delim) {
if (!replacement) {
if (delim)
return str;
urlmap *p;
const char *err;
if (err) {
if (ok == 0) {
if (prev)
if (!f->ctx) {
const char *force;
if (!force) {
if (!f->r->proxyreq) {
else if (!f->r->content_type) {
if (errmsg) {
#ifndef GO_FASTER
return NULL;
fctx->f = f;
return f->ctx;
apr_bucket* b;
const char *buf = 0;
#ifndef USE_OLD_LIBXML2
if (!ctxt)
b = APR_BUCKET_NEXT(b)) {
if (APR_BUCKET_IS_METADATA(b)) {
if (APR_BUCKET_IS_EOS(b)) {
else if (APR_BUCKET_IS_FLUSH(b)) {
== APR_SUCCESS) {
const char *cenc;
if (!xml2enc_charset ||
if (!xml2enc_charset)
ap_set_content_type(f->r,
return rv;
(int(*)(void*))htmlFreeParserCtxt,
#ifndef USE_OLD_LIBXML2
return APR_SUCCESS;
return ret;
urlmap *a;
return conf;
return err;
const char *usage =
const char *from;
const char *to;
const char *flags;
return usage;
return usage;
return NULL;
return NULL;
return NULL;
if (!attrs) {
return NULL;
{ NULL }
if (!xml2enc_charset) {
return OK;
if (xml2enc_filter)
NULL,
NULL,