mod_deflate.c revision 58015652ffe00f004c6404a0631474f23dadc7da
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* mod_deflate.c: Perform deflate content-encoding on the fly
*
* Written by Ian Holsman, Justin Erenkrantz, and Nick Kew
*/
/*
* Portions of this software are based upon zlib code by Jean-loup Gailly
* (zlib functions gz_open and gzwrite, check_header)
*/
/* zlib flags */
#include "httpd.h"
#include "http_config.h"
#include "http_log.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_general.h"
#include "util_filter.h"
#include "apr_buckets.h"
#include "http_request.h"
#define APR_WANT_STRFUNC
#include "apr_want.h"
#include "zlib.h"
static const char deflateFilterName[] = "DEFLATE";
typedef struct deflate_filter_config_t
{
int windowSize;
int memlevel;
int compressionlevel;
char *note_ratio_name;
char *note_input_name;
char *note_output_name;
/* RFC 1952 Section 2.3 defines the gzip header:
*
* +---+---+---+---+---+---+---+---+---+---+
* |ID1|ID2|CM |FLG| MTIME |XFL|OS |
* +---+---+---+---+---+---+---+---+---+---+
*/
static const char gzip_header[10] =
0, 0, 0, 0, /* mtime */
0, 0x03 /* Unix OS_CODE */
};
/* magic header */
/* windowsize is negative to suppress Zlib header */
#define DEFAULT_WINDOWSIZE -15
#define DEFAULT_MEMLEVEL 9
#define DEFAULT_BUFFERSIZE 8096
/* Check whether a request is gzipped, so we can un-gzip it.
* If a request has multiple encodings, we need the gzip
* to be the outermost non-identity encoding.
*/
{
int found = 0;
/* the output filter has two tables and a content_encoding to check */
if (!encoding) {
encoding = r->content_encoding;
}
}
found = 1;
if (hdrs) {
}
else {
r->content_encoding = NULL;
}
}
/* If the outermost encoding isn't gzip, there's nowt
* we can do. So only check the last non-identity token
*/
char *ptr;
for(;;) {
if (!token) { /* gzip:identity or other:identity */
found = 1;
if (hdrs) {
}
else {
r->content_encoding = NULL;
}
}
break; /* seen all tokens */
}
*token = '\0';
if (hdrs) {
}
else {
r->content_encoding = new_encoding;
}
found = 1;
}
*token = '\0';
continue; /* strip the token and find the next one */
}
break; /* found a non-identity token */
}
}
}
/*
* If we have dealt with the headers above but content_encoding was set
* before sync it with the new value in the hdrs table as
* r->content_encoding takes precedence later on in the http_header_filter
* and hence would destroy what we have just set in the hdrs table.
*/
if (hdrs && r->content_encoding) {
}
return found;
}
/* Outputs a long in LSB order to the given file
* only the bottom 4 bits are required for the deflate file format.
*/
{
string[0] = (unsigned char)(x & 0xff);
}
/* Inputs a string and returns a long.
*/
{
return ((unsigned long)string[0])
}
{
deflate_filter_config *c = apr_pcalloc(p, sizeof *c);
c->memlevel = DEFAULT_MEMLEVEL;
c->windowSize = DEFAULT_WINDOWSIZE;
c->bufferSize = DEFAULT_BUFFERSIZE;
return c;
}
const char *arg)
{
int i;
if (i < 1 || i > 15)
return "DeflateWindowSize must be between 1 and 15";
c->windowSize = i * -1;
return NULL;
}
const char *arg)
{
if (n <= 0) {
return "DeflateBufferSize should be positive";
}
c->bufferSize = (apr_size_t)n;
return NULL;
}
{
}
}
}
}
else {
}
return NULL;
}
const char *arg)
{
int i;
if (i < 1 || i > 9)
return "DeflateMemLevel must be between 1 and 9";
c->memlevel = i;
return NULL;
}
const char *arg)
{
int i;
if (i < 1 || i > 9)
return "Compression Level must be between 1 and 9";
c->compressionlevel = i;
return NULL;
}
typedef struct deflate_ctx_t
{
unsigned char *buffer;
unsigned long crc;
int (*libz_end_func)(z_streamp);
unsigned char *validation_buffer;
int inflate_init;
} deflate_ctx;
/* Number of validation bytes (CRC and length) after the compressed data */
#define VALIDATION_SIZE 8
/* Do not update ctx->crc, see comment in flush_libz_buffer */
#define NO_UPDATE_CRC 0
/* Do update ctx->crc, see comment in flush_libz_buffer */
#define UPDATE_CRC 1
struct apr_bucket_alloc_t *bucket_alloc,
int crc)
{
int done = 0;
unsigned int deflate_len;
apr_bucket *b;
for (;;) {
if (deflate_len != 0) {
/*
* Do we need to update ctx->crc? Usually this is the case for
* inflate action where we need to do a crc on the output, whereas
* in the deflate case we need to do a crc on the input
*/
if (crc) {
}
}
if (done)
break;
/*
* We can ignore Z_BUF_ERROR because:
* When we call libz_func we can assume that
*
* - avail_in is zero (due to the surrounding code that calls
* flush_libz_buffer)
* - avail_out is non zero due to our actions some lines above
*
* So the only reason for Z_BUF_ERROR is that the internal libz
* buffers are now empty and thus we called libz_func one time
* too often. This does not hurt. It simply says that we are done.
*/
if (zRC == Z_BUF_ERROR) {
break;
}
break;
}
return zRC;
}
{
if (ctx)
return APR_SUCCESS;
}
/* ETag must be unique among the possible representations, so a change
* to content-encoding requires a corresponding change to the ETag.
* This routine appends -transform (e.g., -gzip) to the entity-tag
* value inside the double-quotes if an ETag has already been set
* and its value already contains double-quotes. PR 39727
*/
{
char *d = newtag;
char *e = d + etaglen - 1;
const char *s = etag;
for (; d < e; ++d, ++s) {
*d = *s; /* copy etag to newtag up to last quote */
}
*d++ = '-'; /* append dash to newtag */
s = transform;
e = d + transformlen;
for (; d < e; ++d, ++s) {
*d = *s; /* copy transform to newtag */
}
*d++ = '"'; /* append quote to newtag */
*d = '\0'; /* null terminate newtag */
}
}
}
{
apr_bucket *e;
request_rec *r = f->r;
int zRC;
/* Do nothing if asked to filter nothing. */
if (APR_BRIGADE_EMPTY(bb)) {
}
/* If we don't have a context, we need to ensure that it is okay to send
* the deflated content. If we have a context, that means we've done
* this before and we liked it.
* This could be not so nice if we always fail. But, if we succeed,
* we're in better shape.
*/
if (!ctx) {
char *token;
const char *encoding;
/*
* Only work on main request, not subrequests,
* that are not a 204 response with no content
* and are not tagged with the no-gzip env variable
* and not a partial response to a Range request.
*/
) {
}
/* Some browsers might have problems with content types
* other than text/html, so set gzip-only-text/html
* (with browsermatch) for them
*/
if (r->content_type == NULL
"gzip-only-text/html");
}
}
/* Let's see what our current Content-Encoding is.
* If it's already encoded, don't compress again.
* (We could, but let's not.)
*/
if (encoding) {
const char *err_enc;
if (err_enc) {
}
}
else {
}
if (r->content_encoding) {
r->content_encoding, NULL)
: r->content_encoding;
}
if (encoding) {
/* stolen from mod_negotiation: */
}
/* Otherwise, skip token */
if (*tmp) {
++tmp;
}
}
}
/* Even if we don't accept this request based on it not having
* the Accept-Encoding, we need to note that we were looking
* for this header and downstream proxies should be aware of that.
*/
/* force-gzip will just force it out regardless if the browser
* can actually do anything with it.
*/
const char *accepts;
/* if they don't have the line, then they can't play */
}
/* skip parameters, XXX: ;q=foo evaluation? */
while (*accepts == ';') {
++accepts;
}
/* retrieve next token */
if (*accepts == ',') {
++accepts;
}
}
/* No acceptable token found. */
}
}
/* At this point we have decided to filter the content, so change
* important content metadata before sending any response out.
*/
/* If the entire Content-Encoding is "identity", we can replace it. */
}
else {
}
/* Fix r->content_encoding if it was set before */
if (r->content_encoding) {
"Content-Encoding");
}
deflate_check_etag(r, "gzip");
/* For a 304 response, only change the headers */
if (r->status == HTTP_NOT_MODIFIED) {
}
c->windowSize, c->memlevel,
"unable to init Zlib: "
"deflateInit2 returned %d: URL %s",
/*
* Remove ourselves as it does not make sense to return:
* We are not able to init libz and pass data down the chain
* uncompressed.
*/
}
/*
* Register a cleanup function to ensure that we cleanup the internal
* libz resources.
*/
/* add immortal gzip header */
f->c->bucket_alloc);
/* initialize deflate output buffer */
}
while (!APR_BRIGADE_EMPTY(bb))
{
const char *data;
apr_bucket *b;
e = APR_BRIGADE_FIRST(bb);
if (APR_BUCKET_IS_EOS(e)) {
char *buf;
/* flush the remaining data from the zlib buffers */
f->c->bucket_alloc);
"Zlib: Compressed %ld to %ld : URL %s",
/* leave notes for logging */
if (c->note_input_name) {
? apr_off_t_toa(r->pool,
: "-");
}
if (c->note_output_name) {
? apr_off_t_toa(r->pool,
: "-");
}
if (c->note_ratio_name) {
* 100
: "-");
}
/* No need for cleanup any longer */
/* Remove EOS from the old list, and insert into the new. */
/* Okay, we've seen the EOS.
* Time to pass it along down the chain.
*/
}
if (APR_BUCKET_IS_FLUSH(e)) {
/* flush the remaining data from the zlib buffers */
return APR_EGENERAL;
}
/* Remove flush bucket from old brigade anf insert into the new. */
if (rv != APR_SUCCESS) {
return rv;
}
continue;
}
if (APR_BUCKET_IS_METADATA(e)) {
/*
* Remove meta data bucket from old brigade and insert into the
* new.
*/
continue;
}
/* read */
/* This crc32 function is from zlib. */
/* write */
* but we'll just have to
* trust zlib */
NULL, f->c->bucket_alloc);
/* Send what we have right now to the next filter. */
if (rv != APR_SUCCESS) {
return rv;
}
}
return APR_EGENERAL;
}
}
}
return APR_SUCCESS;
}
/* This is the deflate input filter (inflates). */
{
request_rec *r = f->r;
int zRC;
/* just get out of the way of things we don't want. */
if (mode != AP_MODE_READBYTES) {
}
if (!ctx) {
char deflate_hdr[10];
if (!ap_is_initial_req(r)) {
}
/* We can't operate on Content-Ranges */
}
/* Check whether request body is gzipped.
*
* If it is, we're transforming the contents, invalidating
* some request headers including Content-Encoding.
*
* If not, we just remove ourself.
*/
}
if (rv != APR_SUCCESS) {
return rv;
}
len = 10;
if (rv != APR_SUCCESS) {
return rv;
}
/* We didn't get the magic bytes. */
if (len != 10 ||
deflate_hdr[0] != deflate_magic[0] ||
return APR_EGENERAL;
}
/* We can't handle flags for now. */
if (deflate_hdr[3] != 0) {
return APR_EGENERAL;
}
"unable to init Zlib: "
"inflateInit2 returned %d: URL %s",
}
/* initialize deflate output buffer */
}
if (rv != APR_SUCCESS) {
/* What about APR_EAGAIN errors? */
return rv;
}
{
const char *data;
/* If we actually see the EOS, that means we screwed up! */
if (APR_BUCKET_IS_EOS(bkt)) {
return APR_EGENERAL;
}
if (APR_BUCKET_IS_FLUSH(bkt)) {
return APR_EGENERAL;
}
NULL, f->c->bucket_alloc);
/* Move everything to the returning brigade. */
break;
}
/* read */
/* pass through zlib inflate. */
NULL, f->c->bucket_alloc);
}
if (zRC == Z_STREAM_END) {
break;
}
return APR_EGENERAL;
}
}
if (zRC == Z_STREAM_END) {
"Zlib: Inflated %ld to %ld : URL %s",
r->uri);
NULL, f->c->bucket_alloc);
/* Is the remaining 8 bytes already in the avail stream? */
return APR_EGENERAL;
}
return APR_EGENERAL;
}
}
else {
/* FIXME: We need to grab the 8 verification bytes
* from the wire! */
return APR_EGENERAL;
}
break;
}
}
}
/* If we are about to return nothing for a 'blocking' read and we have
* some data in our zlib buffer, flush it out so we can return something.
*/
if (block == APR_BLOCK_READ &&
NULL, f->c->bucket_alloc);
}
/* May return APR_INCOMPLETE which is fine by us. */
}
return APR_SUCCESS;
}
/* Filter to inflate for a content-transforming proxy. */
{
int zlib_method;
int zlib_flags;
apr_bucket *e;
request_rec *r = f->r;
int zRC;
/* Do nothing if asked to filter nothing. */
if (APR_BRIGADE_EMPTY(bb)) {
}
if (!ctx) {
/*
* Only work on main request, not subrequests,
* that are not a 204 response with no content
* and not a partial response to a Range request,
* and only when Content-Encoding ends in gzip.
*/
) {
}
/*
* At this point we have decided to filter the content, so change
* important content metadata before sending any response out.
* Content-Encoding was already reset by the check_gzip() call.
*/
deflate_check_etag(r, "gunzip");
/* For a 304 response, only change the headers */
if (r->status == HTTP_NOT_MODIFIED) {
}
ctx->validation_buffer_length = 0;
"unable to init Zlib: "
"inflateInit2 returned %d: URL %s",
/*
* Remove ourselves as it does not make sense to return:
* We are not able to init libz and pass data down the chain
* compressed.
*/
}
/*
* Register a cleanup function to ensure that we cleanup the internal
* libz resources.
*/
/* initialize inflate output buffer */
ctx->inflate_init = 0;
}
while (!APR_BRIGADE_EMPTY(bb))
{
const char *data;
apr_bucket *b;
e = APR_BRIGADE_FIRST(bb);
if (APR_BUCKET_IS_EOS(e)) {
/*
* We are really done now. Ensure that we never return here, even
* if a second EOS bucket falls down the chain. Thus remove
* ourselves.
*/
/* should be zero already anyway */
/*
* Flush the remaining data from the zlib buffers. It is correct
* to use Z_SYNC_FLUSH in this case and not Z_FINISH as in the
* deflate case. In the inflate case Z_FINISH requires to have a
* large enough output buffer to put ALL data in otherwise it
* fails, whereas in the deflate case you can empty a filled output
* buffer and call it again until no more output can be created.
*/
"Zlib: Inflated %ld to %ld : URL %s",
"Zlib: Checksum of inflated stream invalid");
return APR_EGENERAL;
}
"Zlib: Length of inflated stream invalid");
return APR_EGENERAL;
}
}
else {
"Zlib: Validation bytes not present");
return APR_EGENERAL;
}
/* No need for cleanup any longer */
/* Remove EOS from the old list, and insert into the new. */
/*
* Okay, we've seen the EOS.
* Time to pass it along down the chain.
*/
}
if (APR_BUCKET_IS_FLUSH(e)) {
/* flush the remaining data from the zlib buffers */
return APR_EGENERAL;
}
/* Remove flush bucket from old brigade anf insert into the new. */
if (rv != APR_SUCCESS) {
return rv;
}
continue;
}
if (APR_BUCKET_IS_METADATA(e)) {
/*
* Remove meta data bucket from old brigade and insert into the
* new.
*/
continue;
}
/* read */
/* first bucket contains zlib header */
if (!ctx->inflate_init++) {
if (len < 10) {
"Insufficient data for inflate");
return APR_EGENERAL;
}
else {
if (zlib_method != Z_DEFLATED) {
"inflate: data not deflated!");
}
if (data[0] != deflate_magic[0] ||
(zlib_flags & RESERVED) != 0) {
"inflate: bad header");
return APR_EGENERAL ;
}
data += 10 ;
len -= 10 ;
}
if (zlib_flags & EXTRA_FIELD) {
bytes += 2;
"inflate: extra field too big (not "
"supported)");
return APR_EGENERAL;
}
}
if (zlib_flags & ORIG_NAME) {
}
if (zlib_flags & COMMENT) {
}
if (zlib_flags & HEAD_CRC) {
len -= 2;
data += 2;
}
}
/* pass through zlib inflate. */
if (ctx->validation_buffer) {
/* Saved copy_size bytes */
}
"Zlib: %d bytes of garbage at the end of "
/*
* There is nothing worth consuming for zlib left, because it is
* either garbage data or the data has been copied to the
* validation buffer (processing validation data is no business
* for zlib). So set ctx->stream.avail_in to zero to indicate
* this to the following while loop.
*/
}
}
NULL, f->c->bucket_alloc);
/* Send what we have right now to the next filter. */
if (rv != APR_SUCCESS) {
return rv;
}
}
if (zRC == Z_STREAM_END) {
/*
* We have inflated all data. Now try to capture the
* validation bytes. We may not have them all available
* right now, but capture what is there.
*/
"Zlib: %d bytes of garbage at the end of "
"compressed stream.",
}
if (ctx->validation_buffer_length)
break;
}
return APR_EGENERAL;
}
}
}
return APR_SUCCESS;
}
static void register_hooks(apr_pool_t *p)
{
}
static const command_rec deflate_filter_cmds[] = {
"Set a note to report on compression ratio"),
RSRC_CONF, "Set the Deflate window size (1-15)"),
"Set the Deflate Buffer Size"),
"Set the Deflate Memory Level (1-9)"),
"Set the Deflate Compression Level (1-9)"),
{NULL}
};
NULL, /* dir config creater */
NULL, /* dir merger --- default is to override */
create_deflate_server_config, /* server config */
NULL, /* merge server config */
deflate_filter_cmds, /* command table */
register_hooks /* register hooks */
};