mod_disk_cache.c revision 88d0e50f16b21d4d0af0a48da7ad28fb5991834c
/* Copyright 2000-2006 The Apache Software Foundation or its licensors, as
* applicable.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "apr_file_io.h"
#include "apr_strings.h"
#include "mod_cache.h"
#include "mod_disk_cache.h"
#include "ap_provider.h"
#include "util_filter.h"
#include "util_script.h"
#include "util_charset.h"
/*
* mod_disk_cache: Disk Based HTTP 1.1 Cache.
*
* Flow to Find the .data file:
* Open <hash>.header
* Read in <hash>.header file (may contain Format #1 or Format #2)
* If format #1 (Contains a list of Vary Headers):
* Use each header name (from .header) with our request values (headers_in) to
* re-read in <hash>.header (must be format #2)
* read in <hash>.data
*
* Format #1:
* apr_uint32_t format;
* apr_time_t expire;
* apr_array_t vary_headers (delimited by CRLF)
*
* Format #2:
* disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
* entity name (dobj->name) [length is in disk_cache_info_t->name_len]
* r->headers_out (delimited by CRLF)
* CRLF
* r->headers_in (delimited by CRLF)
* CRLF
*/
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
apr_file_t *file);
/*
* Local static functions
*/
{
}
}
else {
}
}
{
}
}
else {
}
}
{
char *p;
p = strchr(p, '/');
if (!p)
break;
*p = '\0';
/* XXX */
}
*p = '/';
++p;
}
}
/* htcacheclean may remove directories underneath us.
* So, we'll try renaming three times at a cost of 0.002 seconds.
*/
{
if (rv != APR_SUCCESS) {
int i;
/* 1000 micro-seconds aka 0.001 seconds. */
apr_sleep(1000);
}
}
return rv;
}
request_rec *r)
{
/* move the data over */
/* This assumes that the tempfile is on the same file system
* rather than a rename.
*/
if (rv != APR_SUCCESS) {
"disk_cache: rename tempfile to datafile failed:"
}
}
return APR_SUCCESS;
}
{
/* Remove the header file and the body file. */
/* If we opened the temporary data file, close and remove it. */
}
return APR_SUCCESS;
}
/* These two functions get and put state information into the data
* file for an ap_cache_el, this state information will be read
* and written transparent to clients of this module
*/
{
char *urlbuff;
/* read the data from the cache file */
len = sizeof(disk_cache_info_t);
if (rv != APR_SUCCESS) {
return rv;
}
/* Store it away so we can get it later. */
/* Note that we could optimize this by conditionally doing the palloc
* depending upon the size. */
if (rv != APR_SUCCESS) {
return rv;
}
/* check that we have the same URL */
/* Would strncmp be correct? */
return APR_EGENERAL;
}
return APR_SUCCESS;
}
{
int i, k;
int nvec;
const char *header;
const char **elts;
/* TODO:
* - Handle multiple-value headers better. (sort them?)
* - Handle Case in-sensitive Values better.
* This isn't the end of the world, since it just lowers the cache
* hit rate, but it would be nice to fix.
*
* The majority are case insenstive if they are values (encoding etc).
* Most of rfc2616 is case insensitive on header contents.
*
* So the better solution may be to identify headers which should be
* treated case-sensitive?
* HTTP URI's (3.2.3) [host and scheme are insensitive]
* HTTP method (5.1.1)
* HTTP-date values (3.3.1)
* 3.7 Media Types [exerpt]
* The type, subtype, and parameter attribute names are case-
* insensitive. Parameter values might or might not be case-sensitive,
* depending on the semantics of the parameter name.
* 4.20 Except [exerpt]
* Comparison of expectation values is case-insensitive for unquoted
* tokens (including the 100-continue token), and is case-sensitive for
* quoted-string expectation-extensions.
*/
if (!header) {
header = "";
}
k++;
k++;
}
k++;
}
{
}
{
char *token;
}
/* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
sizeof(char *), array_alphasort);
}
/*
* Hook and mod_cache callback functions
*/
{
return DECLINED;
}
/* Note, len is -1 if unknown so don't trust it too hard */
"disk_cache: URL %s failed the size check "
return DECLINED;
}
"disk_cache: URL %s failed the size check "
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
/* Save the cache root */
return OK;
}
{
const char *nkey;
static int error_logged = 0;
int flags;
/* Look up entity keyed to 'url' */
if (!error_logged) {
error_logged = 1;
"disk_cache: Cannot cache files to disk without a CacheRoot specified.");
}
return DECLINED;
}
/* Create and init the cache object */
/* Open the headers file */
/* Save the cache root */
if (rc != APR_SUCCESS) {
return DECLINED;
}
/* read the format from the cache file */
if (format == VARY_FORMAT_VERSION) {
if (expire < r->request_time) {
return DECLINED;
}
if (rc != APR_SUCCESS) {
"disk_cache: Cannot parse vary header file: %s",
return DECLINED;
}
if (rc != APR_SUCCESS) {
return DECLINED;
}
}
else if (format != DISK_FORMAT_VERSION) {
"disk_cache: File '%s' has a version mismatch. File had version: %d.",
return DECLINED;
}
else {
/* This wasn't a Vary Format file, so we must seek to the
* start of the file again, so that later reads work.
*/
}
/* Open the data file */
#ifdef APR_SENDFILE_ENABLED
#endif
if (rc != APR_SUCCESS) {
/* XXX: Log message */
return DECLINED;
}
if (rc == APR_SUCCESS) {
}
/* Read the bytes to setup the cache_info fields */
if (rc != APR_SUCCESS) {
/* XXX log message */
return DECLINED;
}
/* Initialize the cache_handle callback functions */
return OK;
}
static int remove_entity(cache_handle_t *h)
{
/* Null out the cache object pointer so next time we start from scratch */
return OK;
}
{
/* Get disk cache object from cache handle */
if (!dobj) {
return DECLINED;
}
/* Delete headers file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete headers file %s from cache.",
return DECLINED;
}
}
/* Delete data file */
/* Will only result in an output if httpd is started with -e debug.
* For reason see log_error_core for the case s == NULL.
*/
"disk_cache: Failed to delete data file %s from cache.",
return DECLINED;
}
}
/* now delete directories as far as possible up to our cache root */
const char *str_to_copy;
if (str_to_copy) {
/* remove filename */
*slash = '\0';
/*
* now walk our way back to the cache root, delete everything
* in the way as far as possible
*
* Note: due to the way we constructed the file names in
* header_file and data_file, we are guaranteed that the
* cache_root is suffixed by at least one '/' which will be
* turned into a terminating null by this loop. Therefore,
* we won't either delete or go above our cache root.
*/
"disk_cache: Deleting directory %s from cache",
dir);
break;
}
*slash = '\0';
}
}
}
return OK;
}
{
char w[MAX_STRING_LEN];
int p;
while (1) {
if (rv != APR_SUCCESS) {
"Premature end of vary array.");
return rv;
}
p = strlen(w);
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the array, break out of the loop. */
if (w[0] == '\0') {
break;
}
}
return APR_SUCCESS;
}
{
int i;
const char **elts;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
&amt);
}
{
char w[MAX_STRING_LEN];
char *l;
int p;
while (1) {
/* ### What about APR_EOF? */
if (rv != APR_SUCCESS) {
"Premature end of cache headers.");
return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w);
/* Indeed, the host's '\n':
'\012' for UNIX; '\015' for MacOS; '\025' for OS/390
-- whatever the script generates.
*/
if (p > 0 && w[p - 1] == '\n') {
w[p - 2] = '\0';
}
else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */
if (w[0] == '\0') {
break;
}
/* Chances are that we received an ASCII header text instead of
* the expected EBCDIC header lines. Try to auto-detect:
*/
if (!(l = strchr(w, ':'))) {
int maybeASCII = 0, maybeEBCDIC = 0;
++maybeEBCDIC;
++maybeASCII;
}
if (maybeASCII > maybeEBCDIC) {
"CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
r->filename);
w, &inbytes_left, w, &outbytes_left);
}
}
#endif /*APR_CHARSET_EBCDIC*/
/* if we see a bogus header don't ignore it. Shout and scream */
if (!(l = strchr(w, ':'))) {
return APR_EGENERAL;
}
*l++ = '\0';
while (*l && apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
/*
* Reads headers from a buffer and returns an array of headers.
* Returns NULL on file error
* This routine tries to deal with too long lines and continuation lines.
* @@@: XXX: FIXME: currently the headers are passed thru un-merged.
* Is that okay, or should they be collapsed where possible?
*/
{
/* This case should not happen... */
/* XXX log message */
return APR_NOTFOUND;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
bb->bucket_alloc);
return APR_SUCCESS;
}
{
int i;
&amt);
if (rv != APR_SUCCESS) {
return rv;
}
}
}
&amt);
return rv;
}
{
/* This is flaky... we need to manage the cache_info differently */
if (r->headers_out) {
const char *tmp;
if (tmp) {
r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
r->pool);
if (rv != APR_SUCCESS) {
"disk_cache: rename tempfile to varyfile failed: %s -> %s",
return rv;
}
}
}
if (rv != APR_SUCCESS) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
if (r->headers_out) {
r->server);
&& r->content_type) {
ap_make_content_type(r, r->content_type));
}
r->err_headers_out);
if (rv != APR_SUCCESS) {
return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */
/* FIXME: Make call to the same thing cache_select calls to crack Vary. */
if (r->headers_in) {
r->server);
if (rv != APR_SUCCESS) {
return rv;
}
}
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
* about to write the new headers file.
*/
if (rv != APR_SUCCESS) {
}
if (rv != APR_SUCCESS) {
"disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
return rv;
}
return APR_SUCCESS;
}
{
apr_bucket *e;
/* We write to a temp file and then atomically rename the file over
* in file_cache_el_final().
*/
if (rv != APR_SUCCESS) {
return rv;
}
}
for (e = APR_BRIGADE_FIRST(bb);
e != APR_BRIGADE_SENTINEL(bb);
e = APR_BUCKET_NEXT(e))
{
const char *str;
if (rv != APR_SUCCESS) {
"disk_cache: Error when reading bucket for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return rv;
}
if (rv != APR_SUCCESS) {
"disk_cache: Error when writing cache file for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return rv;
}
"disk_cache: URL %s failed the size check "
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
}
/* Was this the final bucket? If yes, close the temp file and perform
* sanity checks.
*/
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
"disk_cache: URL %s failed the size check "
/* Remove the intermediate cache file and return non-APR_SUCCESS */
return APR_EGENERAL;
}
/* All checks were fine. Move tempfile to final destination */
/* Link to the perm file, and close the descriptor */
file_cache_el_final(dobj, r);
}
return APR_SUCCESS;
}
{
/* XXX: Set default values */
conf->cache_root_len = 0;
return conf;
}
/*
* mod_disk_cache configuration directives handlers.
*/
static const char
{
/* TODO: canonicalize cache_root and strip off any trailing slashes */
return NULL;
}
/*
* Consider eliminating the next two directives in favor of
* Ian's prime number hash...
* key = hash_fn( r->uri)
*/
static const char
{
if (val < 1)
return "CacheDirLevels value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
if (val < 1)
return "CacheDirLength value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return NULL;
}
static const char
{
return NULL;
}
static const char
{
return NULL;
}
static const command_rec disk_cache_cmds[] =
{
"The directory to store cache files"),
"The number of levels of subdirectories in the cache"),
"The number of characters in subdirectory names"),
"The minimum file size to cache a document"),
"The maximum file size to cache a document"),
{NULL}
};
static const cache_provider cache_disk_provider =
{
};
static void disk_cache_register_hook(apr_pool_t *p)
{
/* cache initializer */
}
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_config, /* create per-server config structure */
NULL, /* merge per-server config structures */
disk_cache_cmds, /* command apr_table_t */
disk_cache_register_hook /* register hooks */
};