mod_disk_cache.c revision a80dd6ffd7a1484e7f45e4665689bdd84fc97153
/* Copyright 2000-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr_file_io.h"
#include "apr_strings.h"
#include "mod_cache.h"
#include "ap_provider.h"
#include "util_filter.h"
#include "util_script.h"
#endif
/* Our on-disk header format is:
*
* disk_cache_info_t
* entity name (dobj->name) [length is in disk_cache_info_t->name_len]
* r->headers_out (delimited by CRLF)
* CRLF
* r->headers_in (delimited by CRLF)
* CRLF
*/
#define DISK_FORMAT_VERSION 0
typedef struct {
/* Indicates the format of the header struct stored on-disk. */
int format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
/* The number of times we've cached this entity. */
/* Miscellaneous time values. */
/*
* disk_cache_object_t
* Pointed to by cache_object_t::vobj
*/
typedef struct disk_cache_object {
const char *root; /* the location of the cache directory */
char *tempfile; /* temp file tohold the content */
#if 0
int dirlevels; /* Number of levels of subdirectories */
int dirlength; /* Length of subdirectory names */
#endif
char *datafile; /* name of file where the data will go */
char *hdrsfile; /* name of file where the hdrs will go */
char *hashfile; /* Computed hash key for this URI */
char *name;
/*
* mod_disk_cache configuration
*/
/* TODO: Make defaults OS specific */
#define DEFAULT_DIRLEVELS 3
#define DEFAULT_DIRLENGTH 2
#define DEFAULT_MIN_FILE_SIZE 1
#define DEFAULT_MAX_FILE_SIZE 1000000
#define DEFAULT_CACHE_SIZE 1000000
typedef struct {
const char* cache_root;
double lmfactor; /* factor for estimating expires date */
int dirlevels; /* Number of levels of subdirectories */
int dirlength; /* Length of subdirectory names */
int expirychk; /* true if expiry time is observed for cached files */
/* dgc_time_t gcdt; time of day for daily garbage collection */
int maxgcmem; /* maximum memory used by garbage collection */
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
/*
* Local static functions
*/
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
{
name);
}
}
{
name);
}
}
{
char *p;
p = strchr(p, '/');
if (!p)
break;
*p = '\0';
/* XXX */
}
*p = '/';
++p;
}
}
request_rec *r)
{
/* move the data over */
/* This assumes that the tempfile is on the same file system
* rather than a rename.
*/
if (rv != APR_SUCCESS) {
/* XXX log */
}
}
return APR_SUCCESS;
}
{
/* Remove the header file and the body file. */
/* If we opened the temporary data file, close and remove it. */
}
return APR_SUCCESS;
}
/* These two functions get and put state information into the data
* file for an ap_cache_el, this state information will be read
* and written transparent to clients of this module
*/
{
char *urlbuff;
/* read the data from the cache file */
len = sizeof(disk_cache_info_t);
if (rv != APR_SUCCESS) {
return rv;
}
"cache_disk: URL %s had a on-disk version mismatch",
r->uri);
return APR_EGENERAL;
}
/* Store it away so we can get it later. */
/* Note that we could optimize this by conditionally doing the palloc
* depending upon the size. */
if (rv != APR_SUCCESS) {
return rv;
}
/* check that we have the same URL */
/* Would strncmp be correct? */
return APR_EGENERAL;
}
return APR_SUCCESS;
}
/*
* Hook and mod_cache callback functions
*/
#define AP_TEMPFILE "/aptmpXXXXXX"
const char *key,
{
return DECLINED;
}
/* If the Content-Length is still unknown, cache anyway */
"cache_disk: URL %s failed the size check, "
"or is incomplete",
key);
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
/* XXX Bad Temporary Cast - see cache_object_t notes */
return OK;
}
{
static int error_logged = 0;
int flags;
/* Look up entity keyed to 'url' */
if (!error_logged) {
error_logged = 1;
"disk_cache: Cannot cache files to disk without a CacheRoot specified.");
}
return DECLINED;
}
/* Create and init the cache object */
/* Open the data file */
#ifdef APR_SENDFILE_ENABLED
#endif
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
}
/* Open the headers file */
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
}
if (rc == APR_SUCCESS) {
}
/* Read the bytes to setup the cache_info fields */
if (rc != APR_SUCCESS) {
/* XXX log message */
return DECLINED;
}
/* Initialize the cache_handle callback functions */
return OK;
}
static int remove_entity(cache_handle_t *h)
{
/* Null out the cache object pointer so next time we start from scratch */
return OK;
}
static int remove_url(const char *key)
{
/* XXX: Delete file from cache! */
return OK;
}
{
char w[MAX_STRING_LEN];
char *l;
int p;
while (1) {
/* ### What about APR_EOF? */
if (rv != APR_SUCCESS) {
"Premature end of cache headers.");
return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w);
/* Indeed, the host's '\n':
'\012' for UNIX; '\015' for MacOS; '\025' for OS/390
-- whatever the script generates.
*/
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */
if (w[0] == '\0') {
break;
}
/* Chances are that we received an ASCII header text instead of
* the expected EBCDIC header lines. Try to auto-detect:
*/
if (!(l = strchr(w, ':'))) {
int maybeASCII = 0, maybeEBCDIC = 0;
++maybeEBCDIC;
++maybeASCII;
}
if (maybeASCII > maybeEBCDIC) {
"CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
r->filename);
w, &inbytes_left, w, &outbytes_left);
}
}
#endif /*APR_CHARSET_EBCDIC*/
/* if we see a bogus header don't ignore it. Shout and scream */
if (!(l = strchr(w, ':'))) {
return APR_EGENERAL;
}
*l++ = '\0';
while (*l && apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
/*
* Reads headers from a buffer and returns an array of headers.
* Returns NULL on file error
* This routine tries to deal with too long lines and continuation lines.
* @@@: XXX: FIXME: currently the headers are passed thru un-merged.
* Is that okay, or should they be collapsed where possible?
*/
{
apr_table_t * tmp;
/* This case should not happen... */
/* XXX log message */
return APR_NOTFOUND;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
bb->bucket_alloc);
return APR_SUCCESS;
}
{
int i;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
&amt);
return rv;
}
{
/* This is flaky... we need to manage the cache_info differently */
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
* about to write the new headers file.
*/
if (rv != APR_SUCCESS) {
}
APR_OS_DEFAULT, r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
if (r->headers_out) {
r->server);
r->content_type) {
ap_make_content_type(r, r->content_type));
}
if (rv != APR_SUCCESS) {
return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */
/* Make call to the same thing cache_select_url calls to crack Vary. */
/* @@@ Some day, not today. */
if (r->headers_in) {
r->server);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
else {
/* XXX log message */
}
return APR_SUCCESS;
}
{
apr_bucket *e;
/* We write to a temp file and then atomically rename the file over
* in file_cache_el_final().
*/
if (rv != APR_SUCCESS) {
return rv;
}
}
for (e = APR_BRIGADE_FIRST(bb);
e != APR_BRIGADE_SENTINEL(bb);
e = APR_BUCKET_NEXT(e))
{
const char *str;
if (rv != APR_SUCCESS) {
"cache_disk: Error when writing cache file for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
"cache_disk: URL %s failed the size check (%lu>%lu)",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
}
/* Was this the final bucket? If yes, close the temp file and perform
* sanity checks.
*/
/* If the target value of the content length is unknown
* (h->cache_obj->info.len <= 0), check if connection has been
* aborted by client to avoid caching incomplete request bodies.
*
* This can happen with large responses from slow backends like
* Tomcat via mod_jk.
*/
if (r->connection->aborted) {
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
/* XXX Fixme: file_size isn't constrained by size_t. */
}
/* "Content-Length" and actual content disagree in size. Log that. */
"disk_cache: URL %s failed the size check (%lu != %lu)",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
"cache_disk: URL %s failed the size check (%lu<%lu)",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
/* All checks were fine. Move tempfile to final destination */
/* Link to the perm file, and close the descriptor */
file_cache_el_final(dobj, r);
}
return APR_SUCCESS;
}
{
/* XXX: Set default values */
conf->cache_root_len = 0;
return conf;
}
/*
* mod_disk_cache configuration directives handlers.
*/
static const char
{
/* TODO: canonicalize cache_root and strip off any trailing slashes */
return NULL;
}
static const char
{
return NULL;
}
static const char
{
/*
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
/* XXX */
return NULL;
}
/*
* Consider eliminating the next two directives in favor of
* Ian's prime number hash...
* key = hash_fn( r->uri)
*/
static const char
{
if (val < 1)
return "CacheDirLevels value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
if (val < 1)
return "CacheDirLength value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
return NULL;
}
static const char
{
return NULL;
}
static const char
{
return NULL;
}
static const char
{
/* XXX
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
return NULL;
}
static const char
{
/* XXX
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
return NULL;
}
static const char
{
/* XXX
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
return NULL;
}
static const char
{
/* XXX
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
return NULL;
}
static const char
{
/* XXX
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&disk_cache_module);
*/
return NULL;
}
static const command_rec disk_cache_cmds[] =
{
"The directory to store cache files"),
"The maximum disk space used by the cache in KB"),
"The interval between garbage collections, in hours"),
"The number of levels of subdirectories in the cache"),
"The number of characters in subdirectory names"),
"on if cache observes Expires date when seeking files"),
"The minimum file size to cache a document"),
"The maximum file size to cache a document"),
"The minimum time margin to cache a document"),
"The time of day for garbage collection (24 hour clock)"),
"The time in hours to retain unused file that match a url"),
"The time in hours to retain unchanged files that match a url"),
"The maximum kilobytes of memory used for garbage collection"),
{NULL}
};
static const cache_provider cache_disk_provider =
{
};
static void disk_cache_register_hook(apr_pool_t *p)
{
/* cache initializer */
}
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_config, /* create per-server config structure */
NULL, /* merge per-server config structures */
disk_cache_cmds, /* command apr_table_t */
disk_cache_register_hook /* register hooks */
};