mod_proxy_html.c revision ef82e8fa164e0a1f8b813f7deb6b7ead96018c94
/* Copyright (c) 2003-11, WebThing Ltd
* Copyright (c) 2011-, The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* GO_FASTER
You can #define GO_FASTER to disable trace logging.
*/
#ifdef GO_FASTER
#define VERBOSE(x)
#define VERBOSEB(x)
#else
#endif
/* libxml2 */
#include <libxml/HTMLparser.h>
#include "http_protocol.h"
#include "http_config.h"
#include "http_log.h"
#include "apr_strings.h"
#include "apr_hash.h"
#include "apr_strmatch.h"
#include "apr_lib.h"
#include "apr_optional.h"
#include "mod_xml2enc.h"
#include "http_request.h"
#include "ap_expr.h"
/* globals set once at startup */
static ap_rxplus_t *old_expr;
static ap_regex_t *seek_meta;
static const apr_strmatch_pattern* seek_content;
#define M_HTML 0x01
#define M_EVENTS 0x02
#define M_CDATA 0x04
#define M_REGEX 0x08
#define M_ATSTART 0x10
#define M_ATEND 0x20
#define M_LAST 0x40
#define M_NOTLAST 0x80
#define M_INTERPOLATE_TO 0x100
#define M_INTERPOLATE_FROM 0x200
typedef struct {
const char *val;
} tattr;
typedef struct {
unsigned int start;
unsigned int end;
} meta;
typedef struct urlmap {
unsigned int flags;
unsigned int regflags;
union {
const char *c;
ap_regex_t *r;
} from;
const char *to;
} urlmap;
typedef struct {
const char *doctype;
const char *etag;
unsigned int flags;
const char *charset_out;
int extfix;
int metafix;
int strip_comments;
int interp;
int enabled;
typedef struct {
ap_filter_t *f;
char *buf;
const char *encoding;
} saxctxt;
#define NORM_LC 0x1
#define NORM_MSSLASH 0x2
#define NORM_RESET 0x4
static htmlSAXHandler sax;
static const char *const fpi_html =
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n";
static const char *const fpi_html_legacy =
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
static const char *const fpi_xhtml =
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n";
static const char *const fpi_xhtml_legacy =
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
static const char *const fpi_html5 = "<!DOCTYPE html>\n";
static const char *const html_etag = ">";
static const char *const xhtml_etag = " />";
/*#define DEFAULT_DOCTYPE fpi_html */
static const char *const DEFAULT_DOCTYPE = "";
#define DEFAULT_ETAG html_etag
{
char *p;
for (p = str; *p; ++p)
if (isupper(*p))
*p = tolower(*p);
if (flags & NORM_MSSLASH)
*p = '/';
}
/* This is always utf-8 on entry. We can convert charset within FLUSH */
{
int i;
int begin;
switch (chars[i]) {
default : break;
}
}
}
{
char *newbuf;
return;
(int(*)(void*))free);
(int(*)(void*))free, apr_pool_cleanup_null);
}
}
{
}
{
urlmap *m;
char *found;
char c = 0;
int nmatch;
char *subs;
#ifndef GO_FASTER
#endif
/* parse the text for URLs */
continue;
nmatch = 10;
offs = 0;
)
}
else {
}
}
}
else {
break;
continue;
"C: matched %s, substituting %s",
}
else {
}
}
}
}
}
{
}
else {
/* not sure if this should force-flush
* (i.e. can one cdata section come in multiple calls?)
*/
}
}
{
return;
}
else {
}
}
{
/* enforce html */
return;
}
/* enforce html legacy */
if (!desc)
return;
}
/* TODO - implement HTML "allowed here" using the stack */
/* nah. Keeping the stack is too much overhead */
}
}
}
{
int required_attrs;
int num_match;
char *subs;
const char** a;
urlmap *m;
char *found;
#ifndef GO_FASTER
#endif
int i;
#ifdef HAVE_STACK
const void** descp;
#endif
int enforce = 0;
/* enforce html */
enforce = 2;
return;
}
enforce = 1;
/* enforce html legacy */
if (!desc) {
return;
}
}
"Bogus HTML element %s dropped", name);
return;
}
"Deprecated HTML element %s dropped", name);
return;
}
#ifdef HAVE_STACK
/* TODO - implement HTML "allowed here" */
#endif
required_attrs = 0;
if (attrs) {
for (a = attrs; *a; a += 2) {
case HTML_INVALID:
"Bogus HTML attribute %s of %s dropped",
*a, name);
continue;
case HTML_DEPRECATED:
"Deprecated HTML attribute %s of %s dropped",
*a, name);
continue;
case HTML_REQUIRED:
required_attrs--; /* cross off the number still needed */
/* fallthrough - required implies valid */
default:
break;
}
}
if (a[1]) {
if (linkattrs) {
break;
}
}
}
is_uri = ATTR_EVENT;
break;
}
}
}
switch (is_uri) {
case ATTR_URI:
num_match = 0;
continue;
nmatch = 10;
pmatch, 0)) {
++num_match;
VERBOSE({
const char *f;
ctx->f->r,
f, subs);
})
}
else {
}
}
} else {
++num_match;
0, ctx->f->r,
"H: matched %s, substituting %s",
}
else { /* it fits in the existing space */
}
break;
}
}
/* URIs only want one match unless overridden in the config */
break;
}
break;
case ATTR_EVENT:
num_match = 0; /* reset here since we're working per-rule */
continue;
nmatch = 10;
offs = 0;
VERBOSE({
const char *f;
ctx->f->r,
f, subs);
})
}
else {
}
++num_match;
}
}
else {
continue;
while (found) {
m->from.c);
continue;
}
else {
m->from.c);
}
0, ctx->f->r,
"E: matched %s, substituting %s",
}
else {
}
++num_match;
}
}
break;
}
break;
case ATTR_IGNORE:
break;
}
}
if (!a[1])
else {
/* write the attribute, using pcharacters to html-escape
anything that needs it in the value.
*/
}
}
}
else
if ((enforce > 0) && (required_attrs > 0)) {
/* if there are more required attributes than we found then complain */
"HTML element %s is missing %d required attributes",
}
}
{
const char *p;
const char *q;
char *header;
char *content;
char delim;
while (!apr_isalpha(*++p));
for (q = p; apr_isalnum(*q) || (*q == '-'); ++q);
/* find content=... string */
/* if it doesn't contain "content", ignore, don't crash! */
if (p != NULL) {
while (*p) {
p += 7;
while (*p && apr_isspace(*p))
++p;
if (*p != '=')
continue;
while (*p && apr_isspace(*++p));
if ((*p == '\'') || (*p == '"')) {
delim = *p++;
for (q = p; *q != delim; ++q);
} else {
for (q = p; *q && !apr_isspace(*q) && (*q != '>'); ++q);
}
break;
}
}
}
}
#ifndef GO_FASTER
"Adding header [%s: %s] from HTML META",
#endif
}
}
return ret;
}
{
const char *start;
const char *end;
const char *delim;
const char *before;
const char *after;
const char *replacement;
const char *var;
for (;;) {
break;
break;
if (delim) {
}
else {
}
if (!replacement) {
if (delim)
else
replacement = "";
}
}
return str;
}
{
urlmap *p;
request_rec *r = ctx->f->r;
const char *err;
if (err) {
"Error evaluating expr: %s", err);
}
if (ok == 0) {
continue; /* condition is unsatisfied */
}
}
continue; /* don't use empty from-pattern */
}
}
}
/* evaluate p->cond; continue if unsatisfied */
/* create new urlmap with memcpy and append to map */
/* interpolate from if flagged to do so */
/* interpolate to if flagged to do so */
else
}
if (prev)
}
{
if (!f->ctx) {
const char *force;
if (!force) {
if (!f->r->proxyreq) {
errmsg = "Non-proxy request; not inserting proxy-html filter";
}
else if (!f->r->content_type) {
errmsg = "No content-type; bailing out of proxy-html filter";
}
strncasecmp(f->r->content_type,
"application/xhtml+xml", 21)) {
errmsg = "Non-HTML content; not inserting proxy-html filter";
}
}
errmsg = "No links configured: nothing for proxy-html filter to do";
}
if (errmsg) {
#ifndef GO_FASTER
#endif
return NULL;
}
fctx->f = f;
f->r->connection->bucket_alloc);
else
/* defer dealing with charset_out until after sniffing charset_in
* so we can support setting one to t'other.
*/
}
return f->ctx;
}
{
apr_bucket* b;
const char *buf = 0;
apr_size_t bytes = 0;
#ifndef USE_OLD_LIBXML2
#endif
if (!ctxt)
for (b = APR_BRIGADE_FIRST(bb);
b != APR_BRIGADE_SENTINEL(bb);
b = APR_BUCKET_NEXT(b)) {
if (APR_BUCKET_IS_METADATA(b)) {
if (APR_BUCKET_IS_EOS(b)) {
}
}
else if (APR_BUCKET_IS_FLUSH(b)) {
/* pass on flush, except at start where it would cause
* headers to be sent before doc sniffing
*/
}
}
}
== APR_SUCCESS) {
const char *cenc;
if (!xml2enc_charset ||
if (!xml2enc_charset)
"No i18n support found. Install mod_xml2enc if required");
ap_set_content_type(f->r, "text/html;charset=utf-8");
}
else {
/* if we wanted a non-default charset_out, insert the
* xml2enc filter now that we've sniffed it
*/
ap_set_content_type(f->r,
apr_pstrcat(f->r->pool,
}
else /* Normal case, everything worked, utf-8 output */
ap_set_content_type(f->r, "text/html;charset=utf-8");
}
4, 0, enc);
buf += 4;
bytes -= 4;
return rv;
}
(int(*)(void*))htmlFreeParserCtxt,
#ifndef USE_OLD_LIBXML2
"Unsupported parser opts %x", xmlopts);
#endif
if (m) {
}
else {
}
}
else {
}
}
else {
"Error in bucket read");
}
}
/*ap_fflush(ctxt->f->next, ctxt->bb); // uncomment for debug */
return APR_SUCCESS;
}
{
/* ret->interp = 1; */
return ret;
}
{
/* don't merge declarations - just use the most specific */
urlmap *a;
}
}
}
else
}
else {
}
return conf;
}
{
}
else {
}
/* back-compatibility: support old-style ENV expressions
* by converting to ap_expr syntax.
*
* 1. var --> env(var)
* 2. var=val --> env(var)=val
* 3. !var --> !env(var)
* 4. !var=val --> env(var)!=val
*/
/* we got a substitution. Check for the case (3) above
* that the regexp gets wrong: a negation without a comparison.
*/
newcond[0] = '!';
}
}
}
else {
}
return err;
}
{
const char *usage =
"Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]";
const char *from;
const char *to;
const char *flags;
return usage;
return usage;
/* the args look OK, so let's use them */
}
else
}
const char *t, const char *l)
{
if (!strcasecmp(t, "xhtml")) {
if (l && !strcasecmp(l, "legacy"))
else
}
else if (!strcasecmp(t, "html")) {
if (l && !strcasecmp(l, "legacy"))
else
}
else if (!strcasecmp(t, "html5")) {
}
else {
if (l && ((l[0] == 'x') || (l[0] == 'X')))
else
}
return NULL;
}
{
}
return NULL;
}
{
return NULL;
}
{
if (!attrs) {
}
return NULL;
}
static const command_rec proxy_html_cmds[] = {
"Strings to be treated as scripting events"),
"Support interpolation and conditions in URLMaps"),
"Enable proxy-html and xml2enc filters"),
{ NULL }
};
{
if (!xml2enc_charset) {
"I18n support in mod_proxy_html requires mod_xml2enc. "
"Without it, non-ASCII characters in proxied pages are "
"likely to display incorrectly.");
}
/* old_expr only needs to last the life of the config phase */
return OK;
}
static void proxy_html_insert(request_rec *r)
{
if (xml2enc_filter)
}
}
static void proxy_html_hooks(apr_pool_t *p)
{
/* move this to pre_config so old_expr is available to interpret
* old-style conditions on URL maps.
*/
}
NULL,
NULL,
};