mod_disk_cache.c revision 0c9166d0186cf0e1ad397025f730ae6967f44ce6
e609c337f729875bc20e01096c7e610f45356f54nilgun/* Copyright 2000-2006 The Apache Software Foundation or its licensors, as
e609c337f729875bc20e01096c7e610f45356f54nilgun * applicable.
e609c337f729875bc20e01096c7e610f45356f54nilgun * Licensed under the Apache License, Version 2.0 (the "License");
e609c337f729875bc20e01096c7e610f45356f54nilgun * you may not use this file except in compliance with the License.
e609c337f729875bc20e01096c7e610f45356f54nilgun * You may obtain a copy of the License at
e609c337f729875bc20e01096c7e610f45356f54nilgun * Unless required by applicable law or agreed to in writing, software
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * distributed under the License is distributed on an "AS IS" BASIS,
2e545ce2450a9953665f701bb05350f0d3f26275nd * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * See the License for the specific language governing permissions and
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * limitations under the License.
e609c337f729875bc20e01096c7e610f45356f54nilgun * mod_disk_cache: Disk Based HTTP 1.1 Cache.
e609c337f729875bc20e01096c7e610f45356f54nilgun * Flow to Find the .data file:
e609c337f729875bc20e01096c7e610f45356f54nilgun * Incoming client requests URI /foo/bar/baz
e609c337f729875bc20e01096c7e610f45356f54nilgun * Generate <hash> off of /foo/bar/baz
e609c337f729875bc20e01096c7e610f45356f54nilgun * Open <hash>.header
e609c337f729875bc20e01096c7e610f45356f54nilgun * Read in <hash>.header file (may contain Format #1 or Format #2)
e609c337f729875bc20e01096c7e610f45356f54nilgun * If format #1 (Contains a list of Vary Headers):
e609c337f729875bc20e01096c7e610f45356f54nilgun * Use each header name (from .header) with our request values (headers_in) to
e609c337f729875bc20e01096c7e610f45356f54nilgun * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
e609c337f729875bc20e01096c7e610f45356f54nilgun * re-read in <hash>.header (must be format #2)
e609c337f729875bc20e01096c7e610f45356f54nilgun * read in <hash>.data
e609c337f729875bc20e01096c7e610f45356f54nilgun * Format #1:
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedooh * apr_uint32_t format;
e609c337f729875bc20e01096c7e610f45356f54nilgun * apr_time_t expire;
e609c337f729875bc20e01096c7e610f45356f54nilgun * apr_array_t vary_headers (delimited by CRLF)
e609c337f729875bc20e01096c7e610f45356f54nilgun * Format #2:
e609c337f729875bc20e01096c7e610f45356f54nilgun * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
e609c337f729875bc20e01096c7e610f45356f54nilgun * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * r->headers_out (delimited by CRLF)
e609c337f729875bc20e01096c7e610f45356f54nilgun * r->headers_in (delimited by CRLF)
e609c337f729875bc20e01096c7e610f45356f54nilgun/* Forward declarations */
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
e609c337f729875bc20e01096c7e610f45356f54nilgun * Local static functions
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic char *header_file(apr_pool_t *p, disk_cache_conf *conf,
e609c337f729875bc20e01096c7e610f45356f54nilgun dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
e609c337f729875bc20e01096c7e610f45356f54nilgun return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic char *data_file(apr_pool_t *p, disk_cache_conf *conf,
e609c337f729875bc20e01096c7e610f45356f54nilgun dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
e609c337f729875bc20e01096c7e610f45356f54nilgun return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic void mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
e609c337f729875bc20e01096c7e610f45356f54nilgun if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
e609c337f729875bc20e01096c7e610f45356f54nilgun/* htcacheclean may remove directories underneath us.
e609c337f729875bc20e01096c7e610f45356f54nilgun * So, we'll try renaming three times at a cost of 0.002 seconds.
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t safe_file_rename(disk_cache_conf *conf,
e609c337f729875bc20e01096c7e610f45356f54nilgun /* 1000 micro-seconds aka 0.001 seconds. */
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t file_cache_el_final(disk_cache_object_t *dobj,
e609c337f729875bc20e01096c7e610f45356f54nilgun /* move the data over */
e609c337f729875bc20e01096c7e610f45356f54nilgun /* This assumes that the tempfile is on the same file system
e609c337f729875bc20e01096c7e610f45356f54nilgun * as the cache_root. If not, then we need a file copy/move
e609c337f729875bc20e01096c7e610f45356f54nilgun * rather than a rename.
e609c337f729875bc20e01096c7e610f45356f54nilgun rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool);
e609c337f729875bc20e01096c7e610f45356f54nilgun "disk_cache: rename tempfile to datafile failed:"
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, request_rec *r)
e609c337f729875bc20e01096c7e610f45356f54nilgun /* Remove the header file and the body file. */
e609c337f729875bc20e01096c7e610f45356f54nilgun /* If we opened the temporary data file, close and remove it. */
e609c337f729875bc20e01096c7e610f45356f54nilgun/* These two functions get and put state information into the data
e609c337f729875bc20e01096c7e610f45356f54nilgun * file for an ap_cache_el, this state information will be read
e609c337f729875bc20e01096c7e610f45356f54nilgun * and written transparent to clients of this module
91f378b5a10f2d83820902ed10ba7967a3920c18nilgunstatic int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
e609c337f729875bc20e01096c7e610f45356f54nilgun /* read the data from the cache file */
e609c337f729875bc20e01096c7e610f45356f54nilgun /* Store it away so we can get it later. */
e609c337f729875bc20e01096c7e610f45356f54nilgun /* Note that we could optimize this by conditionally doing the palloc
e609c337f729875bc20e01096c7e610f45356f54nilgun * depending upon the size. */
e609c337f729875bc20e01096c7e610f45356f54nilgun urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun /* check that we have the same URL */
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun /* Would strncmp be correct? */
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgunstatic const char* regen_key(apr_pool_t *p, apr_table_t *headers,
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun const char *header;
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun const char **elts;
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * - Handle multiple-value headers better. (sort them?)
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * - Handle Case in-sensitive Values better.
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * This isn't the end of the world, since it just lowers the cache
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * hit rate, but it would be nice to fix.
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * The majority are case insenstive if they are values (encoding etc).
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * Most of rfc2616 is case insensitive on header contents.
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * So the better solution may be to identify headers which should be
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * treated case-sensitive?
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * HTTP URI's (3.2.3) [host and scheme are insensitive]
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * HTTP method (5.1.1)
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * HTTP-date values (3.3.1)
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * 3.7 Media Types [exerpt]
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * The type, subtype, and parameter attribute names are case-
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * insensitive. Parameter values might or might not be case-sensitive,
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * depending on the semantics of the parameter name.
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * 4.20 Except [exerpt]
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * Comparison of expectation values is case-insensitive for unquoted
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * tokens (including the 100-continue token), and is case-sensitive for
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun * quoted-string expectation-extensions.
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgunstatic int array_alphasort(const void *fn1, const void *fn2)
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgunstatic void tokens_to_array(apr_pool_t *p, const char *data,
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun while ((token = ap_get_list_item(p, &data)) != NULL) {
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
b9bf3918f6eaf7747bcbfbd02792bcbe4a052784nilgun sizeof(char *), array_alphasort);
e609c337f729875bc20e01096c7e610f45356f54nilgun * Hook and mod_cache callback functions
e609c337f729875bc20e01096c7e610f45356f54nilgunstatic int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len)
e609c337f729875bc20e01096c7e610f45356f54nilgun disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
e609c337f729875bc20e01096c7e610f45356f54nilgun /* Note, len is -1 if unknown so don't trust it too hard */
e609c337f729875bc20e01096c7e610f45356f54nilgun "disk_cache: URL %s failed the size check "
cc7e1025de9ac63bd4db6fe7f71c158b2cf09fe4humbedooh "disk_cache: URL %s failed the size check "
0d0ba3a410038e179b695446bb149cce6264e0abnd /* Allocate and initialize cache_object_t and disk_cache_object_t */
af33a4994ae2ff15bc67d19ff1a7feb906745bf8rbowen /* Save the cache root */
0d0ba3a410038e179b695446bb149cce6264e0abnd dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
e609c337f729875bc20e01096c7e610f45356f54nilgun dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
return OK;
const char *nkey;
static int error_logged = 0;
int flags;
if (!error_logged) {
return DECLINED;
return DECLINED;
return DECLINED;
return DECLINED;
return DECLINED;
return DECLINED;
#ifdef APR_SENDFILE_ENABLED
return DECLINED;
return DECLINED;
return OK;
return OK;
if (!dobj) {
return DECLINED;
return DECLINED;
return DECLINED;
const char *str_to_copy;
if (str_to_copy) {
dir);
return OK;
char w[MAX_STRING_LEN];
return rv;
p = strlen(w);
return APR_SUCCESS;
const char **elts;
&amt);
return rv;
&amt);
char w[MAX_STRING_LEN];
return rv;
p = strlen(w);
++maybeEBCDIC;
++maybeASCII;
r->filename);
return APR_EGENERAL;
while (*l && apr_isspace(*l)) {
return APR_SUCCESS;
return APR_NOTFOUND;
return APR_SUCCESS;
apr_bucket *e;
return APR_SUCCESS;
&amt);
return rv;
&amt);
return rv;
if (r->headers_out) {
const char *tmp;
if (tmp) {
r->pool);
return rv;
r->pool);
return rv;
return rv;
return rv;
if (r->headers_out) {
r->server);
&& r->content_type) {
r->err_headers_out);
return rv;
if (r->headers_in) {
r->server);
return rv;
return rv;
return APR_SUCCESS;
apr_bucket *e;
return rv;
e = APR_BUCKET_NEXT(e))
const char *str;
return rv;
return rv;
return APR_EGENERAL;
return APR_EGENERAL;
return APR_EGENERAL;
return APR_SUCCESS;
return conf;
return NULL;
return NULL;
return NULL;
return NULL;
return NULL;
{NULL}