mod_disk_cache.c revision 6d14bedccc13d7a5f95fb9328eb6d1a07b58731b
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr_lib.h"
#include "apr_file_io.h"
#include "apr_strings.h"
#include "mod_cache.h"
#include "mod_disk_cache.h"
#include "http_config.h"
#include "http_log.h"
#include "http_core.h"
#include "ap_provider.h"
#include "util_filter.h"
#include "util_script.h"
#include "util_charset.h"
/*
* mod_disk_cache: Disk Based HTTP 1.1 Cache.
*
* Flow to Find the .data file:
* Open <hash>.header
* Read in <hash>.header file (may contain Format #1 or Format #2)
* If format #1 (Contains a list of Vary Headers):
* Use each header name (from .header) with our request values (headers_in) to
* re-read in <hash>.header (must be format #2)
* read in <hash>.data
*
* Format #1:
* apr_uint32_t format;
* apr_time_t expire;
* apr_array_t vary_headers (delimited by CRLF)
*
* Format #2:
* disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
* entity name (dobj->name) [length is in disk_cache_info_t->name_len]
* r->headers_out (delimited by CRLF)
* CRLF
* r->headers_in (delimited by CRLF)
* CRLF
*/
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
apr_file_t *file);
/*
* Local static functions
*/
{
}
}
else {
}
}
{
}
}
else {
}
}
{
char *p;
p = strchr(p, '/');
if (!p)
break;
*p = '\0';
return rv;
}
*p = '/';
++p;
}
return APR_SUCCESS;
}
/* htcacheclean may remove directories underneath us.
* So, we'll try renaming three times at a cost of 0.002 seconds.
*/
{
if (rv != APR_SUCCESS) {
int i;
/* 1000 micro-seconds aka 0.001 seconds. */
apr_sleep(1000);
if (rv != APR_SUCCESS)
continue;
}
}
return rv;
}
request_rec *r)
{
/* This assumes that the tempfiles are on the same file system
* rather than a rename.
*/
/* move the file over */
if (rv != APR_SUCCESS) {
"disk_cache: rename tempfile to file failed:"
}
}
return rv;
}
/* clean up the temporary file */
}
return APR_SUCCESS;
}
{
return APR_SUCCESS;
}
/* These two functions get and put state information into the data
* file for an ap_cache_el, this state information will be read
* and written transparent to clients of this module
*/
{
char *urlbuff;
/* read the data from the cache file */
len = sizeof(disk_cache_info_t);
if (rv != APR_SUCCESS) {
return rv;
}
/* Store it away so we can get it later. */
/* Note that we could optimize this by conditionally doing the palloc
* depending upon the size. */
if (rv != APR_SUCCESS) {
return rv;
}
/* check that we have the same URL */
/* Would strncmp be correct? */
return APR_EGENERAL;
}
return APR_SUCCESS;
}
{
int i, k;
int nvec;
const char *header;
const char **elts;
/* TODO:
* - Handle multiple-value headers better. (sort them?)
* - Handle Case in-sensitive Values better.
* This isn't the end of the world, since it just lowers the cache
* hit rate, but it would be nice to fix.
*
* The majority are case insenstive if they are values (encoding etc).
* Most of rfc2616 is case insensitive on header contents.
*
* So the better solution may be to identify headers which should be
* treated case-sensitive?
* HTTP URI's (3.2.3) [host and scheme are insensitive]
* HTTP method (5.1.1)
* HTTP-date values (3.3.1)
* 3.7 Media Types [exerpt]
* The type, subtype, and parameter attribute names are case-
* insensitive. Parameter values might or might not be case-sensitive,
* depending on the semantics of the parameter name.
* 4.20 Except [exerpt]
* Comparison of expectation values is case-insensitive for unquoted
* tokens (including the 100-continue token), and is case-sensitive for
* quoted-string expectation-extensions.
*/
if (!header) {
header = "";
}
k++;
k++;
}
k++;
}
{
}
{
char *token;
}
/* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
sizeof(char *), array_alphasort);
}
/*
* Hook and mod_cache callback functions
*/
{
return DECLINED;
}
/* we don't support caching of range requests (yet) */
if (r->status == HTTP_PARTIAL_CONTENT) {
"disk_cache: URL %s partial content response not cached",
key);
return DECLINED;
}
/* Note, len is -1 if unknown so don't trust it too hard */
"disk_cache: URL %s failed the size check "
return DECLINED;
}
"disk_cache: URL %s failed the size check "
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
/* Save the cache root */
return OK;
}
{
const char *nkey;
static int error_logged = 0;
#ifdef APR_SENDFILE_ENABLED
&core_module);
#endif
int flags;
/* Look up entity keyed to 'url' */
if (!error_logged) {
error_logged = 1;
"disk_cache: Cannot cache files to disk without a CacheRoot specified.");
}
return DECLINED;
}
/* Create and init the cache object */
/* Open the headers file */
/* Save the cache root */
if (rc != APR_SUCCESS) {
return DECLINED;
}
/* read the format from the cache file */
if (format == VARY_FORMAT_VERSION) {
if (rc != APR_SUCCESS) {
"disk_cache: Cannot parse vary header file: %s",
return DECLINED;
}
if (rc != APR_SUCCESS) {
return DECLINED;
}
}
else if (format != DISK_FORMAT_VERSION) {
"disk_cache: File '%s' has a version mismatch. File had version: %d.",
return DECLINED;
}
else {
/* oops, not vary as it turns out */
/* This wasn't a Vary Format file, so we must seek to the
* start of the file again, so that later reads work.
*/
}
/* Open the data file */
#ifdef APR_SENDFILE_ENABLED
/* When we are in the quick handler we don't have the per-directory
* configuration, so this check only takes the global setting of
* the EnableSendFile directive into account.
*/
#endif
if (rc != APR_SUCCESS) {
return DECLINED;
}
if (rc == APR_SUCCESS) {
}
/* Read the bytes to setup the cache_info fields */
if (rc != APR_SUCCESS) {
return DECLINED;
}
/* Initialize the cache_handle callback functions */
return OK;
}
static int remove_entity(cache_handle_t *h)
{
/* Null out the cache object pointer so next time we start from scratch */
return OK;
}
{
/* Get disk cache object from cache handle */
if (!dobj) {
return DECLINED;
}
/* Delete headers file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete headers file %s from cache.",
return DECLINED;
}
}
/* Delete data file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete data file %s from cache.",
return DECLINED;
}
}
/* now delete directories as far as possible up to our cache root */
const char *str_to_copy;
if (str_to_copy) {
/* remove filename */
*slash = '\0';
/*
* now walk our way back to the cache root, delete everything
* in the way as far as possible
*
* Note: due to the way we constructed the file names in
* header_file and data_file, we are guaranteed that the
* cache_root is suffixed by at least one '/' which will be
* turned into a terminating null by this loop. Therefore,
* we won't either delete or go above our cache root.
*/
"disk_cache: Deleting directory %s from cache",
dir);
break;
}
*slash = '\0';
}
}
}
return OK;
}
{
char w[MAX_STRING_LEN];
int p;
while (1) {
if (rv != APR_SUCCESS) {
"Premature end of vary array.");
return rv;
}
p = strlen(w);
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the array, break out of the loop. */
if (w[0] == '\0') {
break;
}
}
return APR_SUCCESS;
}
{
int i;
const char **elts;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
&amt);
}
{
char w[MAX_STRING_LEN];
char *l;
int p;
while (1) {
/* ### What about APR_EOF? */
if (rv != APR_SUCCESS) {
"Premature end of cache headers.");
return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w);
/* Indeed, the host's '\n':
'\012' for UNIX; '\015' for MacOS; '\025' for OS/390
-- whatever the script generates.
*/
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */
if (w[0] == '\0') {
break;
}
/* Chances are that we received an ASCII header text instead of
* the expected EBCDIC header lines. Try to auto-detect:
*/
if (!(l = strchr(w, ':'))) {
int maybeASCII = 0, maybeEBCDIC = 0;
++maybeEBCDIC;
++maybeASCII;
}
if (maybeASCII > maybeEBCDIC) {
"CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
r->filename);
w, &inbytes_left, w, &outbytes_left);
}
}
#endif /*APR_CHARSET_EBCDIC*/
/* if we see a bogus header don't ignore it. Shout and scream */
if (!(l = strchr(w, ':'))) {
return APR_EGENERAL;
}
*l++ = '\0';
while (*l && apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
/*
* Reads headers from a buffer and returns an array of headers.
* Returns NULL on file error
* This routine tries to deal with too long lines and continuation lines.
* @@@: XXX: FIXME: currently the headers are passed thru un-merged.
* Is that okay, or should they be collapsed where possible?
*/
{
/* This case should not happen... */
return APR_NOTFOUND;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
return APR_SUCCESS;
}
{
int i;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
&amt);
return rv;
}
{
/* This is flaky... we need to manage the cache_info differently */
if (r->headers_out) {
const char *tmp;
if (tmp) {
/* If we were initially opened as a vary format, rollback
* that internal state for the moment so we can recreate the
* vary format hints in the appropriate directory.
*/
}
if (rv != APR_SUCCESS) {
"disk_cache: could not create temp file %s",
return rv;
}
}
}
if (rv != APR_SUCCESS) {
"disk_cache: could not create temp file %s",
return rv;
}
if (rv != APR_SUCCESS) {
"disk_cache: could not write info to header file %s",
return rv;
}
if (r->headers_out) {
if (rv != APR_SUCCESS) {
"disk_cache: could not write out-headers to header file %s",
return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */
/* FIXME: Make call to the same thing cache_select calls to crack Vary. */
if (r->headers_in) {
if (rv != APR_SUCCESS) {
"disk_cache: could not write in-headers to header file %s",
return rv;
}
}
return APR_SUCCESS;
}
{
apr_bucket *e;
int seen_eos = 0;
/* We write to a temp file and then atomically rename the file over
* in file_cache_el_final().
*/
if (rv != APR_SUCCESS) {
return rv;
}
}
}
}
}
}
const char *str;
e = APR_BRIGADE_FIRST(in);
/* are we done completely? if so, pass any trailing buckets right through */
continue;
}
/* have we seen eos yet? */
if (APR_BUCKET_IS_EOS(e)) {
seen_eos = 1;
break;
}
/* honour flush buckets, we'll get called again */
if (APR_BUCKET_IS_FLUSH(e)) {
break;
}
/* metadata buckets are preserved as is */
if (APR_BUCKET_IS_METADATA(e)) {
continue;
}
/* read the bucket, write to the cache */
if (rv != APR_SUCCESS) {
"disk_cache: Error when reading bucket for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return rv;
}
/* write to the cache, leave if we fail */
if (rv != APR_SUCCESS) {
"disk_cache: Error when writing cache file for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return rv;
}
"disk_cache: URL %s failed the size check "
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
/* have we reached the limit of how much we're prepared to write in one
* go? If so, leave, we'll get called again. This prevents us from trying
* to swallow too much data at once, or taking so long to write the data
* the client times out.
*/
break;
}
break;
}
}
/* Was this the final bucket? If yes, close the temp file and perform
* sanity checks.
*/
if (seen_eos) {
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
"disk_cache: URL %s failed the size check "
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
if (cl_header) {
"disk_cache: URL %s didn't receive complete response, not caching",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
}
/* All checks were fine, we're good to go when the commit comes */
}
return APR_SUCCESS;
}
{
/* move header and data tempfiles to the final destination */
if (APR_SUCCESS == rv) {
}
if (APR_SUCCESS == rv) {
}
/* remove the cached items completely on any failure */
if (APR_SUCCESS != rv) {
remove_url(h, r->pool);
"disk_cache: commit_entity: URL '%s' not cached due to earlier disk error.",
}
else {
"disk_cache: commit_entity: Headers and body for URL %s cached.",
}
return APR_SUCCESS;
}
{
return dconf;
}
return new;
}
{
/* XXX: Set default values */
conf->cache_root_len = 0;
return conf;
}
/*
* mod_disk_cache configuration directives handlers.
*/
static const char
{
/* TODO: canonicalize cache_root and strip off any trailing slashes */
return NULL;
}
/*
* Consider eliminating the next two directives in favor of
* Ian's prime number hash...
* key = hash_fn( r->uri)
*/
static const char
{
if (val < 1)
return "CacheDirLevels value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
if (val < 1)
return "CacheDirLength value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
{
return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
}
return NULL;
}
static const char
{
{
return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
}
return NULL;
}
static const char
{
{
return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
}
return NULL;
}
static const char
{
milliseconds < 0)
{
return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
}
return NULL;
}
static const command_rec disk_cache_cmds[] =
{
"The directory to store cache files"),
"The number of levels of subdirectories in the cache"),
"The number of characters in subdirectory names"),
"The minimum file size to cache a document"),
"The maximum file size to cache a document"),
"The maximum quantity of data to attempt to read and cache in one go"),
"The maximum time taken to attempt to read and cache in go"),
{NULL}
};
static const cache_provider cache_disk_provider =
{
};
static void disk_cache_register_hook(apr_pool_t *p)
{
/* cache initializer */
}
create_dir_config, /* create per-directory config structure */
merge_dir_config, /* merge per-directory config structures */
create_config, /* create per-server config structure */
NULL, /* merge per-server config structures */
disk_cache_cmds, /* command apr_table_t */
disk_cache_register_hook /* register hooks */
};