mod_disk_cache.c revision a14ccf0f7e9b44c6848334823542a1799577f669
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
095e49b99a1a9a4604e5750771e062c061cd12bdwrowe * applicable.
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Licensed under the Apache License, Version 2.0 (the "License");
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * you may not use this file except in compliance with the License.
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * You may obtain a copy of the License at
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * http://www.apache.org/licenses/LICENSE-2.0
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Unless required by applicable law or agreed to in writing, software
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * distributed under the License is distributed on an "AS IS" BASIS,
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * See the License for the specific language governing permissions and
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * limitations under the License.
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "apr_file_io.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "apr_strings.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "mod_cache.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "ap_provider.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "util_filter.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "util_script.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#include "util_charset.h"
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
095e49b99a1a9a4604e5750771e062c061cd12bdwrowe/*
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * mod_disk_cache: Disk Based HTTP 1.1 Cache.
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Flow to Find the .data file:
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Incoming client requests URI /foo/bar/baz
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Generate <hash> off of /foo/bar/baz
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Open <hash>.header
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Read in <hash>.header file (may contain Format #1 or Format #2)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * If format #1 (Contains a list of Vary Headers):
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Use each header name (from .header) with our request values (headers_in) to
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk * re-read in <hash>.header (must be format #2)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * read in <hash>.data
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Format #1:
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * apr_uint32_t format;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * apr_time_t expire;
a6dc07c0c81b3337399ae0f64ce2617f0de9e140wrowe * apr_array_t vary_headers (delimited by CRLF)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard *
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk * Format #2:
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * r->headers_out (delimited by CRLF)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * CRLF
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * r->headers_in (delimited by CRLF)
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * CRLF
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define VARY_FORMAT_VERSION 1
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define DISK_FORMAT_VERSION 2
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddardtypedef struct {
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard /* Indicates the format of the header struct stored on-disk. */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_uint32_t format;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard /* The HTTP status code returned for this response. */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard int status;
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk /* The size of the entity name that follows. */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_size_t name_len;
f84cab2da5f8958575b1ce99ca2bf4fda34cecb6mturk /* The number of times we've cached this entity. */
fb929740692fe796b467212ec57aa34cebb562cfwrowe apr_size_t entity_version;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard /* Miscellaneous time values. */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_time_t date;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_time_t expire;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_time_t request_time;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_time_t response_time;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard} disk_cache_info_t;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard/*
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * disk_cache_object_t
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * Pointed to by cache_object_t::vobj
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddardtypedef struct disk_cache_object {
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard const char *root; /* the location of the cache directory */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard char *tempfile; /* temp file tohold the content */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#if 0
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard int dirlevels; /* Number of levels of subdirectories */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard int dirlength; /* Length of subdirectory names */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#endif
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard char *datafile; /* name of file where the data will go */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard char *hdrsfile; /* name of file where the hdrs will go */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard char *hashfile; /* Computed hash key for this URI */
83cb9e783386a18eecdb0749d9d17aa8e3bc012estoddard char *name; /* Requested URI without vary bits - suitable for mortals. */
83cb9e783386a18eecdb0749d9d17aa8e3bc012estoddard char *key; /* On-disk prefix; URI with Vary bits (if present) */
83cb9e783386a18eecdb0749d9d17aa8e3bc012estoddard apr_file_t *fd; /* data file */
83cb9e783386a18eecdb0749d9d17aa8e3bc012estoddard apr_file_t *hfd; /* headers file */
43681d859fe7d4cd90a33d6be2232192c108de75wrowe apr_file_t *tfd; /* temporary file for data */
43681d859fe7d4cd90a33d6be2232192c108de75wrowe apr_off_t file_size; /* File size of the cached data file */
43681d859fe7d4cd90a33d6be2232192c108de75wrowe disk_cache_info_t disk_info; /* Header information. */
43681d859fe7d4cd90a33d6be2232192c108de75wrowe} disk_cache_object_t;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard/*
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard * mod_disk_cache configuration
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard/* TODO: Make defaults OS specific */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define CACHEFILE_LEN 20 /* must be less than HASH_LEN/2 */
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define DEFAULT_DIRLEVELS 3
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define DEFAULT_DIRLENGTH 2
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard#define DEFAULT_MIN_FILE_SIZE 1
43681d859fe7d4cd90a33d6be2232192c108de75wrowe#define DEFAULT_MAX_FILE_SIZE 1000000
43681d859fe7d4cd90a33d6be2232192c108de75wrowe
43681d859fe7d4cd90a33d6be2232192c108de75wrowetypedef struct {
43681d859fe7d4cd90a33d6be2232192c108de75wrowe const char* cache_root;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard apr_size_t cache_root_len;
8e117661fd51fd19d6430fca8d7ae87c67d6de20stoddard int dirlevels; /* Number of levels of subdirectories */
int dirlength; /* Length of subdirectory names */
apr_size_t minfs; /* minumum file size for cached files */
apr_size_t maxfs; /* maximum file size for cached files */
} disk_cache_conf;
module AP_MODULE_DECLARE_DATA disk_cache_module;
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
apr_file_t *file);
/*
* Local static functions
*/
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
disk_cache_object_t *dobj, const char *name)
{
if (!dobj->hashfile) {
dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
conf->dirlength, name);
}
return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
CACHE_HEADER_SUFFIX, NULL);
}
static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
disk_cache_object_t *dobj, const char *name)
{
if (!dobj->hashfile) {
dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
conf->dirlength, name);
}
return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
CACHE_DATA_SUFFIX, NULL);
}
static void mkdir_structure(disk_cache_conf *conf, char *file, apr_pool_t *pool)
{
apr_status_t rv;
char *p;
for (p = file + conf->cache_root_len + 1;;) {
p = strchr(p, '/');
if (!p)
break;
*p = '\0';
rv = apr_dir_make(file,
APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
/* XXX */
}
*p = '/';
++p;
}
}
static apr_status_t file_cache_el_final(disk_cache_object_t *dobj,
request_rec *r)
{
/* move the data over */
if (dobj->tfd) {
apr_status_t rv;
apr_file_close(dobj->tfd);
/* This assumes that the tempfile is on the same file system
* as the cache_root. If not, then we need a file copy/move
* rather than a rename.
*/
rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool);
if (rv != APR_SUCCESS) {
/* XXX log */
}
dobj->tfd = NULL;
}
return APR_SUCCESS;
}
static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, request_rec *r)
{
/* Remove the header file and the body file. */
apr_file_remove(dobj->hdrsfile, r->pool);
apr_file_remove(dobj->datafile, r->pool);
/* If we opened the temporary data file, close and remove it. */
if (dobj->tfd) {
apr_file_close(dobj->tfd);
apr_file_remove(dobj->tempfile, r->pool);
dobj->tfd = NULL;
}
return APR_SUCCESS;
}
/* These two functions get and put state information into the data
* file for an ap_cache_el, this state information will be read
* and written transparent to clients of this module
*/
static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
disk_cache_object_t *dobj, request_rec *r)
{
apr_status_t rv;
char *urlbuff;
disk_cache_info_t disk_info;
apr_size_t len;
/* read the data from the cache file */
len = sizeof(disk_cache_info_t);
rv = apr_file_read_full(fd, &disk_info, len, &len);
if (rv != APR_SUCCESS) {
return rv;
}
/* Store it away so we can get it later. */
dobj->disk_info = disk_info;
info->status = disk_info.status;
info->date = disk_info.date;
info->expire = disk_info.expire;
info->request_time = disk_info.request_time;
info->response_time = disk_info.response_time;
/* Note that we could optimize this by conditionally doing the palloc
* depending upon the size. */
urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
len = disk_info.name_len;
rv = apr_file_read_full(fd, urlbuff, len, &len);
if (rv != APR_SUCCESS) {
return rv;
}
urlbuff[disk_info.name_len] = '\0';
/* check that we have the same URL */
/* Would strncmp be correct? */
if (strcmp(urlbuff, dobj->name) != 0) {
return APR_EGENERAL;
}
return APR_SUCCESS;
}
static char* regen_key(apr_pool_t *p, apr_table_t *headers,
apr_array_header_t *varray, const char *oldkey)
{
struct iovec *iov;
int i, k;
int nvec;
const char *header;
const char **elts;
nvec = (varray->nelts * 2) + 1;
iov = apr_palloc(p, sizeof(struct iovec) * nvec);
elts = (const char **) varray->elts;
/* TODO:
* - Handle multiple-value headers better. (sort them?)
* - Handle Case in-sensitive Values better.
* This isn't the end of the world, since it just lowers the cache
* hit rate, but it would be nice to fix.
*
* The majority are case insenstive if they are values (encoding etc).
* Most of rfc2616 is case insensitive on header contents.
*
* So the better solution may be to identify headers which should be
* treated case-sensitive?
* HTTP URI's (3.2.3) [host and scheme are insensitive]
* HTTP method (5.1.1)
* HTTP-date values (3.3.1)
* 3.7 Media Types [exerpt]
* The type, subtype, and parameter attribute names are case-
* insensitive. Parameter values might or might not be case-sensitive,
* depending on the semantics of the parameter name.
* 4.20 Except [exerpt]
* Comparison of expectation values is case-insensitive for unquoted
* tokens (including the 100-continue token), and is case-sensitive for
* quoted-string expectation-extensions.
*/
for(i=0, k=0; i < varray->nelts; i++) {
header = apr_table_get(headers, elts[i]);
if (!header) {
header = "";
}
iov[k].iov_base = (char*) elts[i];
iov[k].iov_len = strlen(elts[i]);
k++;
iov[k].iov_base = (char*) header;
iov[k].iov_len = strlen(header);
k++;
}
iov[k].iov_base = (char*) oldkey;
iov[k].iov_len = strlen(oldkey);
k++;
return apr_pstrcatv(p, iov, k, NULL);
}
static int array_alphasort(const void *fn1, const void *fn2)
{
return strcmp(*(char**)fn1, *(char**)fn2);
}
static void tokens_to_array(apr_pool_t *p, const char *data,
apr_array_header_t *arr)
{
char *token;
while ((token = ap_get_list_item(p, &data)) != NULL) {
*((const char **) apr_array_push(arr)) = token;
}
/* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
qsort((void *) arr->elts, arr->nelts,
sizeof(char *), array_alphasort);
}
/*
* Hook and mod_cache callback functions
*/
#define AP_TEMPFILE "/aptmpXXXXXX"
static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len)
{
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
cache_object_t *obj;
disk_cache_object_t *dobj;
if (conf->cache_root == NULL) {
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
obj->key = apr_pstrdup(r->pool, key);
dobj->name = obj->key;
dobj->datafile = data_file(r->pool, conf, dobj, key);
dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
return OK;
}
static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
{
apr_uint32_t format;
apr_size_t len;
char *nkey;
apr_status_t rc;
static int error_logged = 0;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
apr_finfo_t finfo;
cache_object_t *obj;
cache_info *info;
disk_cache_object_t *dobj;
int flags;
h->cache_obj = NULL;
/* Look up entity keyed to 'url' */
if (conf->cache_root == NULL) {
if (!error_logged) {
error_logged = 1;
ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
"disk_cache: Cannot cache files to disk without a CacheRoot specified.");
}
return DECLINED;
}
/* Create and init the cache object */
h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
info = &(obj->info);
/* Open the headers file */
dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
flags = APR_READ|APR_BINARY|APR_BUFFERED;
rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
if (rc != APR_SUCCESS) {
return DECLINED;
}
/* read the format from the cache file */
len = sizeof(format);
apr_file_read_full(dobj->hfd, &format, len, &len);
if (format == VARY_FORMAT_VERSION) {
apr_array_header_t* varray;
apr_time_t expire;
len = sizeof(expire);
apr_file_read_full(dobj->hfd, &expire, len, &len);
if (expire < r->request_time) {
return DECLINED;
}
varray = apr_array_make(r->pool, 5, sizeof(char*));
rc = read_array(r, varray, dobj->hfd);
if (rc != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
"disk_cache: Cannot parse vary header file: %s",
dobj->hdrsfile);
return DECLINED;
}
apr_file_close(dobj->hfd);
nkey = regen_key(r->pool, r->headers_in, varray, key);
dobj->hashfile = NULL;
dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
flags = APR_READ|APR_BINARY|APR_BUFFERED;
rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
if (rc != APR_SUCCESS) {
return DECLINED;
}
}
else if (format != DISK_FORMAT_VERSION) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
"cache_disk: File '%s' has a version mismatch. File had version: %d.",
dobj->hdrsfile, format);
return DECLINED;
}
else {
apr_off_t offset = 0;
/* This wasn't a Vary Format file, so we must seek to the
* start of the file again, so that later reads work.
*/
apr_file_seek(dobj->hfd, APR_SET, &offset);
nkey = (char*)key;
}
obj->key = nkey;
dobj->key = nkey;
dobj->name = (char*)key;
dobj->datafile = data_file(r->pool, conf, dobj, nkey);
dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
/* Open the data file */
flags = APR_READ|APR_BINARY;
#ifdef APR_SENDFILE_ENABLED
flags |= APR_SENDFILE_ENABLED;
#endif
rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool);
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
}
rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
if (rc == APR_SUCCESS) {
dobj->file_size = finfo.size;
}
/* Read the bytes to setup the cache_info fields */
rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r);
if (rc != APR_SUCCESS) {
/* XXX log message */
return DECLINED;
}
/* Initialize the cache_handle callback functions */
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"disk_cache: Recalled cached URL info header %s", dobj->name);
return OK;
}
static int remove_entity(cache_handle_t *h)
{
/* Null out the cache object pointer so next time we start from scratch */
h->cache_obj = NULL;
return OK;
}
static int remove_url(const char *key)
{
/* XXX: Delete file from cache! */
return OK;
}
static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
apr_file_t *file)
{
char w[MAX_STRING_LEN];
int p;
apr_status_t rv;
while (1) {
rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
"Premature end of vary array.");
return rv;
}
p = strlen(w);
if (p > 0 && w[p - 1] == '\n') {
if (p > 1 && w[p - 2] == CR) {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the array, break out of the loop. */
if (w[0] == '\0') {
break;
}
*((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
}
return APR_SUCCESS;
}
static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
{
int i;
apr_status_t rv;
struct iovec iov[2];
apr_size_t amt;
const char **elts;
elts = (const char **) arr->elts;
for (i = 0; i < arr->nelts; i++) {
iov[0].iov_base = (char*) elts[i];
iov[0].iov_len = strlen(elts[i]);
iov[1].iov_base = CRLF;
iov[1].iov_len = sizeof(CRLF) - 1;
rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
iov[0].iov_base = CRLF;
iov[0].iov_len = sizeof(CRLF) - 1;
return apr_file_writev(fd, (const struct iovec *) &iov, 1,
&amt);
}
static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
apr_table_t *table, apr_file_t *file)
{
char w[MAX_STRING_LEN];
char *l;
int p;
apr_status_t rv;
while (1) {
/* ### What about APR_EOF? */
rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
"Premature end of cache headers.");
return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w);
/* Indeed, the host's '\n':
'\012' for UNIX; '\015' for MacOS; '\025' for OS/390
-- whatever the script generates.
*/
if (p > 0 && w[p - 1] == '\n') {
if (p > 1 && w[p - 2] == CR) {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */
if (w[0] == '\0') {
break;
}
#if APR_CHARSET_EBCDIC
/* Chances are that we received an ASCII header text instead of
* the expected EBCDIC header lines. Try to auto-detect:
*/
if (!(l = strchr(w, ':'))) {
int maybeASCII = 0, maybeEBCDIC = 0;
unsigned char *cp, native;
apr_size_t inbytes_left, outbytes_left;
for (cp = w; *cp != '\0'; ++cp) {
native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
if (apr_isprint(*cp) && !apr_isprint(native))
++maybeEBCDIC;
if (!apr_isprint(*cp) && apr_isprint(native))
++maybeASCII;
}
if (maybeASCII > maybeEBCDIC) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
"CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
r->filename);
inbytes_left = outbytes_left = cp - w;
apr_xlate_conv_buffer(ap_hdrs_from_ascii,
w, &inbytes_left, w, &outbytes_left);
}
}
#endif /*APR_CHARSET_EBCDIC*/
/* if we see a bogus header don't ignore it. Shout and scream */
if (!(l = strchr(w, ':'))) {
return APR_EGENERAL;
}
*l++ = '\0';
while (*l && apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
/*
* Reads headers from a buffer and returns an array of headers.
* Returns NULL on file error
* This routine tries to deal with too long lines and continuation lines.
* @@@: XXX: FIXME: currently the headers are passed thru un-merged.
* Is that okay, or should they be collapsed where possible?
*/
static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
{
disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
/* This case should not happen... */
if (!dobj->hfd) {
/* XXX log message */
return APR_NOTFOUND;
}
h->req_hdrs = apr_table_make(r->pool, 20);
h->resp_hdrs = apr_table_make(r->pool, 20);
/* Call routine to read the header lines/status line */
read_table(h, r, h->resp_hdrs, dobj->hfd);
read_table(h, r, h->req_hdrs, dobj->hfd);
apr_file_close(dobj->hfd);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"disk_cache: Recalled headers for URL %s", dobj->name);
return APR_SUCCESS;
}
static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
{
apr_bucket *e;
disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, p,
bb->bucket_alloc);
APR_BRIGADE_INSERT_HEAD(bb, e);
e = apr_bucket_eos_create(bb->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(bb, e);
return APR_SUCCESS;
}
static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
{
int i;
apr_status_t rv;
struct iovec iov[4];
apr_size_t amt;
apr_table_entry_t *elts;
elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
if (elts[i].key != NULL) {
iov[0].iov_base = elts[i].key;
iov[0].iov_len = strlen(elts[i].key);
iov[1].iov_base = ": ";
iov[1].iov_len = sizeof(": ") - 1;
iov[2].iov_base = elts[i].val;
iov[2].iov_len = strlen(elts[i].val);
iov[3].iov_base = CRLF;
iov[3].iov_len = sizeof(CRLF) - 1;
rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
iov[0].iov_base = CRLF;
iov[0].iov_len = sizeof(CRLF) - 1;
rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
&amt);
return rv;
}
static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
{
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
apr_status_t rv;
apr_size_t amt;
disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
disk_cache_info_t disk_info;
struct iovec iov[2];
/* This is flaky... we need to manage the cache_info differently */
h->cache_obj->info = *info;
if (r->headers_out) {
const char *tmp;
tmp = apr_table_get(r->headers_out, "Vary");
if (tmp) {
apr_array_header_t* varray;
apr_uint32_t format = VARY_FORMAT_VERSION;
mkdir_structure(conf, dobj->hdrsfile, r->pool);
rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
amt = sizeof(format);
apr_file_write(dobj->tfd, &format, &amt);
amt = sizeof(info->expire);
apr_file_write(dobj->tfd, &info->expire, &amt);
varray = apr_array_make(r->pool, 6, sizeof(char*));
tokens_to_array(r->pool, tmp, varray);
store_array(dobj->tfd, varray);
apr_file_close(dobj->tfd);
dobj->tfd = NULL;
rv = apr_file_rename(dobj->tempfile, dobj->hdrsfile, r->pool);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"disk_cache: rename tempfile to varyfile failed: %s -> %s",
dobj->tempfile, dobj->hdrsfile);
return rv;
}
dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
dobj->hashfile = NULL;
dobj->datafile = data_file(r->pool, conf, dobj, tmp);
dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);
}
}
rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
APR_CREATE | APR_WRITE | APR_BINARY |
APR_BUFFERED | APR_EXCL, r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
dobj->name = h->cache_obj->key;
disk_info.format = DISK_FORMAT_VERSION;
disk_info.date = info->date;
disk_info.expire = info->expire;
disk_info.entity_version = dobj->disk_info.entity_version++;
disk_info.request_time = info->request_time;
disk_info.response_time = info->response_time;
disk_info.status = info->status;
disk_info.name_len = strlen(dobj->name);
iov[0].iov_base = (void*)&disk_info;
iov[0].iov_len = sizeof(disk_cache_info_t);
iov[1].iov_base = dobj->name;
iov[1].iov_len = disk_info.name_len;
rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt);
if (rv != APR_SUCCESS) {
return rv;
}
if (r->headers_out) {
apr_table_t *headers_out;
headers_out = ap_cache_cacheable_hdrs_out(r->pool, r->headers_out,
r->server);
if (!apr_table_get(headers_out, "Content-Type")
&& r->content_type) {
apr_table_setn(headers_out, "Content-Type",
ap_make_content_type(r, r->content_type));
}
headers_out = apr_table_overlay(r->pool, headers_out,
r->err_headers_out);
rv = store_table(dobj->hfd, headers_out);
if (rv != APR_SUCCESS) {
return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */
/* FIXME: Make call to the same thing cache_select_url calls to crack Vary. */
if (r->headers_in) {
apr_table_t *headers_in;
headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in,
r->server);
rv = store_table(dobj->hfd, headers_in);
if (rv != APR_SUCCESS) {
return rv;
}
}
apr_file_close(dobj->hfd); /* flush and close */
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
* about to write the new headers file.
*/
rv = apr_file_remove(dobj->hdrsfile, r->pool);
if (rv != APR_SUCCESS) {
mkdir_structure(conf, dobj->hdrsfile, r->pool);
}
rv = apr_file_rename(dobj->tempfile, dobj->hdrsfile, r->pool);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
"disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
dobj->tempfile, dobj->hdrsfile);
return rv;
}
dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"disk_cache: Stored headers for URL %s", dobj->name);
return APR_SUCCESS;
}
static apr_status_t store_body(cache_handle_t *h, request_rec *r,
apr_bucket_brigade *bb)
{
apr_bucket *e;
apr_status_t rv;
disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
/* We write to a temp file and then atomically rename the file over
* in file_cache_el_final().
*/
if (!dobj->tfd) {
rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
APR_CREATE | APR_WRITE | APR_BINARY |
APR_BUFFERED | APR_EXCL, r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
dobj->file_size = 0;
}
for (e = APR_BRIGADE_FIRST(bb);
e != APR_BRIGADE_SENTINEL(bb);
e = APR_BUCKET_NEXT(e))
{
const char *str;
apr_size_t length, written;
apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
rv = apr_file_write_full(dobj->tfd, str, length, &written);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
"cache_disk: Error when writing cache file for URL %s",
h->cache_obj->key);
/* Remove the intermediate cache file and return non-APR_SUCCESS */
file_cache_errorcleanup(dobj, r);
return APR_EGENERAL;
}
dobj->file_size += written;
if (dobj->file_size > conf->maxfs) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache_disk: URL %s failed the size check "
"(%" APR_OFF_T_FMT ">%" APR_SIZE_T_FMT ")",
h->cache_obj->key, dobj->file_size, conf->maxfs);
/* Remove the intermediate cache file and return non-APR_SUCCESS */
file_cache_errorcleanup(dobj, r);
return APR_EGENERAL;
}
}
/* Was this the final bucket? If yes, close the temp file and perform
* sanity checks.
*/
if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
if (r->connection->aborted) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
h->cache_obj->key);
/* Remove the intermediate cache file and return non-APR_SUCCESS */
file_cache_errorcleanup(dobj, r);
return APR_EGENERAL;
}
if (dobj->file_size < conf->minfs) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache_disk: URL %s failed the size check "
"(%" APR_OFF_T_FMT "<%" APR_SIZE_T_FMT ")",
h->cache_obj->key, dobj->file_size, conf->minfs);
/* Remove the intermediate cache file and return non-APR_SUCCESS */
file_cache_errorcleanup(dobj, r);
return APR_EGENERAL;
}
/* All checks were fine. Move tempfile to final destination */
/* Link to the perm file, and close the descriptor */
file_cache_el_final(dobj, r);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"disk_cache: Body for URL %s cached.", dobj->name);
}
return APR_SUCCESS;
}
static void *create_config(apr_pool_t *p, server_rec *s)
{
disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
/* XXX: Set default values */
conf->dirlevels = DEFAULT_DIRLEVELS;
conf->dirlength = DEFAULT_DIRLENGTH;
conf->maxfs = DEFAULT_MAX_FILE_SIZE;
conf->minfs = DEFAULT_MIN_FILE_SIZE;
conf->cache_root = NULL;
conf->cache_root_len = 0;
return conf;
}
/*
* mod_disk_cache configuration directives handlers.
*/
static const char
*set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
conf->cache_root = arg;
conf->cache_root_len = strlen(arg);
/* TODO: canonicalize cache_root and strip off any trailing slashes */
return NULL;
}
/*
* Consider eliminating the next two directives in favor of
* Ian's prime number hash...
* key = hash_fn( r->uri)
* filename = "/key % prime1 /key %prime2/key %prime3"
*/
static const char
*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
int val = atoi(arg);
if (val < 1)
return "CacheDirLevels value must be an integer greater than 0";
if (val * conf->dirlength > CACHEFILE_LEN)
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
conf->dirlevels = val;
return NULL;
}
static const char
*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
int val = atoi(arg);
if (val < 1)
return "CacheDirLength value must be an integer greater than 0";
if (val * conf->dirlevels > CACHEFILE_LEN)
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
conf->dirlength = val;
return NULL;
}
static const char
*set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
conf->minfs = atoi(arg);
return NULL;
}
static const char
*set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
conf->maxfs = atoi(arg);
return NULL;
}
static const command_rec disk_cache_cmds[] =
{
AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
"The directory to store cache files"),
AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
"The number of levels of subdirectories in the cache"),
AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
"The number of characters in subdirectory names"),
AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
"The minimum file size to cache a document"),
AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
"The maximum file size to cache a document"),
{NULL}
};
static const cache_provider cache_disk_provider =
{
&remove_entity,
&store_headers,
&store_body,
&recall_headers,
&recall_body,
&create_entity,
&open_entity,
&remove_url,
};
static void disk_cache_register_hook(apr_pool_t *p)
{
/* cache initializer */
ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
&cache_disk_provider);
}
module AP_MODULE_DECLARE_DATA disk_cache_module = {
STANDARD20_MODULE_STUFF,
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_config, /* create per-server config structure */
NULL, /* merge per-server config structures */
disk_cache_cmds, /* command apr_table_t */
disk_cache_register_hook /* register hooks */
};