mod_disk_cache.c revision 722ed80e405b9d451cc89465ac2b0568f143a7a2
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr_file_io.h"
#include "apr_strings.h"
#include "mod_cache.h"
#include "mod_disk_cache.h"
#include "ap_provider.h"
#include "util_filter.h"
#include "util_script.h"
#include "util_charset.h"
/*
* mod_disk_cache: Disk Based HTTP 1.1 Cache.
*
* Flow to Find the .data file:
* Open <hash>.header
* Read in <hash>.header file (may contain Format #1 or Format #2)
* If format #1 (Contains a list of Vary Headers):
* Use each header name (from .header) with our request values (headers_in) to
* re-read in <hash>.header (must be format #2)
* read in <hash>.data
*
* Always first in the header file:
* disk_cache_format_t format;
*
* VARY_FORMAT_VERSION:
* apr_time_t expire;
* apr_array_t vary_headers (delimited by CRLF)
*
* DISK_FORMAT_VERSION:
* disk_cache_info_t
* entity name (dobj->name) [length is in disk_cache_info_t->name_len]
* r->headers_out (delimited by CRLF)
* CRLF
* r->headers_in (delimited by CRLF)
* CRLF
*/
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
apr_file_t *file);
/*
* Local static functions
*/
{
}
}
else {
}
}
{
}
}
else {
}
}
{
char *p;
p = strchr(p, '/');
if (!p)
break;
*p = '\0';
*p = '/';
break;
}
++p;
}
return rv;
}
return APR_SUCCESS;
}
/* htcacheclean may remove directories underneath us.
* So, we'll try renaming three times at a cost of 0.002 seconds.
*/
{
if (rv != APR_SUCCESS) {
int i;
/* 1000 micro-seconds aka 0.001 seconds. */
apr_sleep(1000);
}
}
return rv;
}
{
/* Remove the header file and the body file. */
/* If we opened the temporary data file, close and remove it. */
}
return APR_SUCCESS;
}
{
int i, k;
int nvec;
const char *header;
const char **elts;
/* TODO:
* - Handle multiple-value headers better. (sort them?)
* - Handle Case in-sensitive Values better.
* This isn't the end of the world, since it just lowers the cache
* hit rate, but it would be nice to fix.
*
* The majority are case insenstive if they are values (encoding etc).
* Most of rfc2616 is case insensitive on header contents.
*
* So the better solution may be to identify headers which should be
* treated case-sensitive?
* HTTP URI's (3.2.3) [host and scheme are insensitive]
* HTTP method (5.1.1)
* HTTP-date values (3.3.1)
* 3.7 Media Types [exerpt]
* The type, subtype, and parameter attribute names are case-
* insensitive. Parameter values might or might not be case-sensitive,
* depending on the semantics of the parameter name.
* 4.20 Except [exerpt]
* Comparison of expectation values is case-insensitive for unquoted
* tokens (including the 100-continue token), and is case-sensitive for
* quoted-string expectation-extensions.
*/
if (!header) {
header = "";
}
k++;
k++;
}
k++;
}
{
}
{
char *token;
}
/* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
sizeof(char *), array_alphasort);
}
/*
* Hook and mod_cache callback functions
*/
{
return DECLINED;
}
/* Note, len is -1 if unknown so don't trust it too hard */
"disk_cache: URL %s failed the size check "
return DECLINED;
}
"disk_cache: URL %s failed the size check "
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
/* Save the cache root */
return OK;
}
{
done = 0;
while(1) {
if (rc == APR_SUCCESS) {
break;
}
if(!APR_STATUS_IS_EOF(rc)) {
return rc;
}
if(rc != APR_SUCCESS) {
return rc;
}
return APR_ETIMEDOUT;
}
}
return APR_SUCCESS;
}
{
int flags;
if (rc != APR_SUCCESS) {
return CACHE_EDECLINED;
}
/* read the format from the cache file */
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
/* Vary-files are being written to tmpfile and moved in place, so
the should always be complete */
if (format == VARY_FORMAT_VERSION) {
if(rc != APR_SUCCESS) {
return rc;
}
if (expire < r->request_time) {
return CACHE_EDECLINED;
}
if (rc != APR_SUCCESS) {
"disk_cache: Cannot parse vary header file: %s",
return CACHE_EDECLINED;
}
if (rc != APR_SUCCESS) {
return CACHE_EDECLINED;
}
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
}
if(format != DISK_FORMAT_VERSION) {
"disk_cache: File '%s' had a version mismatch. File had "
return CACHE_EDECLINED;
}
/* read the data from the header file */
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
/* Store it away so we can get it later. */
return APR_SUCCESS;
}
{
while(1) {
}
if(rc != CACHE_EDECLINED) {
"disk_cache: Cannot load header file: %s",
}
return rc;
}
/* Objects with unknown body size will have file_size == -1 until the
entire body is written and the header updated with the actual size.
And since we depend on knowing the body size we wait until the size
is written */
break;
}
if(rc != APR_SUCCESS) {
return rc;
}
"disk_cache: Timed out waiting for header for URL %s"
" - caching the body failed?", key);
return CACHE_EDECLINED;
}
}
return APR_SUCCESS;
}
{
&core_module);
int flags;
#if APR_HAS_SENDFILE
? 0 : APR_SENDFILE_ENABLED);
#endif
/* Wait here until we get a body cachefile, data in it, and do quick sanity
* check */
while(1) {
if(rc != APR_SUCCESS) {
"disk_cache: Timed out waiting for body for "
"URL %s - caching failed?", key);
return CACHE_EDECLINED;
}
continue;
}
}
if(rc != APR_SUCCESS) {
return rc;
}
"disk_cache: Bad cached body for URL %s, size %"
return CACHE_EDECLINED;
}
/* Still caching or failed? */
if(rc != APR_SUCCESS ||
{
"disk_cache: Body for URL %s is too small - "
return CACHE_EDECLINED;
}
}
break;
}
}
/* Go back to the beginning */
off = 0;
if(rc != APR_SUCCESS) {
return rc;
}
return APR_SUCCESS;
}
{
static int error_logged = 0;
char urlbuff[MAX_STRING_LEN];
/* Look up entity keyed to 'url' */
if (!error_logged) {
error_logged = 1;
"disk_cache: Cannot cache files to disk without a "
"CacheRoot specified.");
}
return DECLINED;
}
/* Create and init the cache object */
/* Save the cache root */
/* Open header and read basic info, wait until header contains
valid size information for the body */
if(rc != APR_SUCCESS) {
return DECLINED;
}
if(len > 0) {
if (rc == APR_ETIMEDOUT) {
"disk_cache: Timed out waiting for urlbuff for "
"URL %s - caching failed?", key);
return DECLINED;
}
else if(rc != APR_SUCCESS) {
"disk_cache: Error reading urlbuff for URL %s",
key);
return DECLINED;
}
}
/* check that we have the same URL */
"disk_cache: Cached URL %s didn't match requested "
return DECLINED;
}
/* Only need body cachefile if we have a body */
if(dobj->initial_size > 0) {
if(rc != APR_SUCCESS) {
return DECLINED;
}
}
else {
}
return OK;
}
static int remove_entity(cache_handle_t *h)
{
/* Null out the cache object pointer so next time we start from scratch */
return OK;
}
{
/* Get disk cache object from cache handle */
if (!dobj) {
return DECLINED;
}
/* Delete headers file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete headers file %s from cache.",
return DECLINED;
}
}
/* Delete data file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete data file %s from cache.",
return DECLINED;
}
}
/* now delete directories as far as possible up to our cache root */
const char *str_to_copy;
if (str_to_copy) {
/* remove filename */
*slash = '\0';
/*
* now walk our way back to the cache root, delete everything
* in the way as far as possible
*
* Note: due to the way we constructed the file names in
* header_file and data_file, we are guaranteed that the
* cache_root is suffixed by at least one '/' which will be
* turned into a terminating null by this loop. Therefore,
* we won't either delete or go above our cache root.
*/
"disk_cache: Deleting directory %s from cache",
dir);
break;
}
*slash = '\0';
}
}
}
return OK;
}
{
char w[MAX_STRING_LEN];
int p;
while (1) {
if (rv != APR_SUCCESS) {
"Premature end of vary array.");
return rv;
}
p = strlen(w);
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the array, break out of the loop. */
if (w[0] == '\0') {
break;
}
}
return APR_SUCCESS;
}
{
int i;
const char **elts;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
&amt);
}
{
char w[MAX_STRING_LEN];
char *l;
int p;
while (1) {
/* ### What about APR_EOF? */
if (rv != APR_SUCCESS) {
return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w);
/* Indeed, the host's '\n':
'\012' for UNIX; '\015' for MacOS; '\025' for OS/390
-- whatever the script generates.
*/
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */
if (w[0] == '\0') {
break;
}
/* Chances are that we received an ASCII header text instead of
* the expected EBCDIC header lines. Try to auto-detect:
*/
if (!(l = strchr(w, ':'))) {
int maybeASCII = 0, maybeEBCDIC = 0;
++maybeEBCDIC;
++maybeASCII;
}
if (maybeASCII > maybeEBCDIC) {
"disk_cache: CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
r->filename);
w, &inbytes_left, w, &outbytes_left);
}
}
#endif /*APR_CHARSET_EBCDIC*/
/* if we see a bogus header don't ignore it. Shout and scream */
if (!(l = strchr(w, ':'))) {
return APR_EGENERAL;
}
*l++ = '\0';
while (*l && apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
{
off = 0;
if(rv != APR_SUCCESS) {
return rv;
}
while(1) {
if(rv == APR_SUCCESS) {
break;
}
if(rv != APR_SUCCESS) {
return rv;
}
if(rv != APR_SUCCESS ||
{
"disk_cache: Timed out waiting for cache headers "
return APR_EGENERAL;
}
}
return APR_SUCCESS;
}
/*
* Reads headers from a buffer and returns an array of headers.
* Returns NULL on file error
* This routine tries to deal with too long lines and continuation lines.
* @@@: XXX: FIXME: currently the headers are passed thru un-merged.
* Is that okay, or should they be collapsed where possible?
*/
{
/* This case should not happen... */
/* XXX log message */
return APR_NOTFOUND;
}
if(rv != APR_SUCCESS) {
"disk_cache: Timed out waiting for response headers "
return rv;
}
if(rv != APR_SUCCESS) {
"disk_cache: Timed out waiting for request headers "
return rv;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
return APR_SUCCESS;
}
{
int i;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
&amt);
return rv;
}
{
while(1) {
/* FIXME: Debug */
"disk_cache: open_new_file: Opening %s", filename);
if(APR_STATUS_IS_EEXIST(rv)) {
if(APR_STATUS_IS_ENOENT(rv)) {
/* Someone else has already removed it, try again */
continue;
}
else if(rv != APR_SUCCESS) {
return rv;
}
/* Something stale that's left around */
"disk_cache: open_new_file: Failed to "
"remove old %s", filename);
return rv;
}
continue;
}
else {
/* Someone else has just created the file, return identifiable
status so calling function can do the right thing */
return CACHE_EEXIST;
}
}
else if(APR_STATUS_IS_ENOENT(rv)) {
/* The directory for the file didn't exist */
if(rv != APR_SUCCESS) {
"disk_cache: open_new_file: Failed to make "
"directory for %s", filename);
return rv;
}
continue;
}
else if(rv == APR_SUCCESS) {
return APR_SUCCESS;
}
else {
"disk_cache: open_new_file: Failed to open %s",
filename);
return rv;
}
}
/* We should never get here, so */
return APR_EGENERAL;
}
const char *varyhdr)
{
const char *vfile;
int flags;
}
else {
}
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
"disk_cache: rename tempfile to varyfile failed: "
return rv;
}
}
return APR_SUCCESS;
}
{
int niov;
niov = 0;
if (rv != APR_SUCCESS) {
return rv;
}
if (r->headers_out) {
r->server);
&& r->content_type) {
ap_make_content_type(r, r->content_type));
}
r->err_headers_out);
if (rv != APR_SUCCESS) {
return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */
/* FIXME: Make call to the same thing cache_select calls to crack Vary. */
if (r->headers_in) {
r->server);
if (rv != APR_SUCCESS) {
return rv;
}
}
return APR_SUCCESS;
}
{
/* This is flaky... we need to manage the cache_info differently */
}
else {
}
if (r->headers_out) {
const char *tmp;
if (tmp) {
if(rv != APR_SUCCESS) {
return rv;
}
}
}
if(rewriting) {
/* Assume we are just rewriting the header if we have an fd. The
fd might be readonly though, in that case reopen it for writes.
Something equivalent to fdopen would have been handy. */
if (rv != APR_SUCCESS) {
return rv;
}
}
else {
/* We can write here, so let's just move to the right place */
if (rv != APR_SUCCESS) {
return rv;
}
}
}
else {
if(rv == CACHE_EEXIST) {
}
else if(rv != APR_SUCCESS) {
return rv;
}
}
"disk_cache: Skipping store for URL %s: Someone else "
return APR_SUCCESS;
}
if(rv != APR_SUCCESS) {
return rv;
}
return APR_SUCCESS;
}
{
if (!buf) {
return APR_ENOMEM;
}
if(srcoff != 0) {
if(rc != APR_SUCCESS) {
return rc;
}
}
if(destoff != 0) {
if(rc != APR_SUCCESS) {
return rc;
}
}
/* Tried doing this with mmap, but sendfile on Linux got confused when
sending a file while it was being written to from an mmapped area.
The traditional way seems to be good enough, and less complex.
*/
while(len > 0) {
if(rc != APR_SUCCESS) {
return rc;
}
if(rc != APR_SUCCESS) {
return rc;
}
}
/* Check if file has changed during copying. This is not 100% foolproof
due to NFS attribute caching when on NFS etc. */
/* FIXME: Can we assume that we're always copying an entire file? In that
case we can check if the current filesize matches the length
we think it is */
if(rc != APR_SUCCESS) {
return rc;
}
return APR_EGENERAL;
}
return APR_SUCCESS;
}
request_rec *r,
{
int flags;
apr_bucket *e;
&core_module);
#if APR_HAS_SENDFILE
? 0 : APR_SENDFILE_ENABLED);
#endif
if (rv != APR_SUCCESS) {
"disk_cache: Error opening datafile %s for URL %s",
return rv;
}
/* First, empty the brigade */
e = APR_BRIGADE_FIRST(bb);
while (e != APR_BRIGADE_SENTINEL(bb)) {
apr_bucket *d;
d = e;
e = APR_BUCKET_NEXT(e);
}
/* Then, populate it with our cached instance */
if (rv != APR_SUCCESS) {
return rv;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
if(r->no_cache) {
"disk_cache: store_body called for URL %s even though"
return APR_EGENERAL;
}
if(dobj->initial_size == 0) {
/* Don't waste a body cachefile on a 0 length body */
return APR_SUCCESS;
}
/* Someone else beat us to storing this object */
/* FIXME: Read-while-caching here */
return APR_SUCCESS;
}
if(rv == CACHE_EEXIST) {
/* Someone else beat us to storing this */
/* FIXME: Read-while-caching here later on */
return APR_SUCCESS;
}
else if(rv != APR_SUCCESS) {
return rv;
}
}
/* Check if this is a complete single sequential file, eligable for
* file copy.
*/
{
apr_bucket_file *a;
for (e = APR_BRIGADE_FIRST(bb);
e != APR_BRIGADE_SENTINEL(bb);
e = APR_BUCKET_NEXT(e))
{
if(APR_BUCKET_IS_EOS(e)) {
break;
}
if(!APR_BUCKET_IS_FILE(e)) {
break;
}
a = e->data;
if(begin < 0) {
}
break;
}
}
if(copy_file) {
}
}
if(copy_file) {
apr_bucket_file *a;
"disk_cache: Copying body for URL %s, len %"
e = APR_BRIGADE_FIRST(bb);
a = e->data;
if(rv != APR_SUCCESS) {
"disk_cache: Copying body failed, "
return rv;
}
}
else {
for (e = APR_BRIGADE_FIRST(bb);
e != APR_BRIGADE_SENTINEL(bb);
e = APR_BUCKET_NEXT(e))
{
const char *str;
/* Ignore the non-data-buckets */
if(APR_BUCKET_IS_METADATA(e)) {
continue;
}
if (rv != APR_SUCCESS) {
"disk_cache: Error when reading bucket for URL %s",
return rv;
}
if (rv != APR_SUCCESS) {
"disk_cache: Error when writing cache file for "
return rv;
}
"disk_cache: URL %s failed the size check "
return APR_EGENERAL;
}
}
}
/* Drop out here if this wasn't the end */
return APR_SUCCESS;
}
if(!copy_file) {
"disk_cache: Done caching URL %s, len %" APR_OFF_T_FMT,
/* FIXME: Do we really need to check r->no_cache here since we checked
it in the beginning? */
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
"disk_cache: URL %s failed the size check "
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
if(dobj->initial_size < 0) {
/* Update header information now that we know the size */
if(rv != APR_SUCCESS) {
return rv;
}
}
"disk_cache: URL %s - body size mismatch: suggested %"
return APR_EGENERAL;
}
}
/* All checks were fine, close output file */
if(rv != APR_SUCCESS) {
return rv;
}
/* Redirect to cachefile if we copied a plain file */
if(copy_file) {
if(rv != APR_SUCCESS) {
return rv;
}
}
return APR_SUCCESS;
}
{
/* XXX: Set default values */
conf->cache_root_len = 0;
return conf;
}
/*
* mod_disk_cache configuration directives handlers.
*/
static const char
{
/* TODO: canonicalize cache_root and strip off any trailing slashes */
return NULL;
}
/*
* Consider eliminating the next two directives in favor of
* Ian's prime number hash...
* key = hash_fn( r->uri)
*/
static const char
{
if (val < 1)
return "CacheDirLevels value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
if (val < 1)
return "CacheDirLength value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
{
return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
}
return NULL;
}
static const char
{
{
return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
}
return NULL;
}
static const char
{
{
return "CacheUpdateTimeout argument must be a non-negative integer representing the timeout in milliseconds for cache update operations";
}
return NULL;
}
static const command_rec disk_cache_cmds[] =
{
"The directory to store cache files"),
"The number of levels of subdirectories in the cache"),
"The number of characters in subdirectory names"),
"The minimum file size to cache a document"),
"The maximum file size to cache a document"),
"Timeout in ms for cache updates"),
{NULL}
};
static const cache_provider cache_disk_provider =
{
};
static void disk_cache_register_hook(apr_pool_t *p)
{
/* cache initializer */
}
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_config, /* create per-server config structure */
NULL, /* merge per-server config structures */
disk_cache_cmds, /* command apr_table_t */
disk_cache_register_hook /* register hooks */
};