mod_cache.c revision bc786d0dba3cefae135fb85ab303ddb2b229e43d
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define CORE_PRIVATE
#include "mod_cache.h"
/* -------------------------------------------------------------- */
/* Handles for cache filters, resolved at startup to eliminate
* a name-to-function mapping on each request
*/
static ap_filter_rec_t *cache_save_filter_handle;
static ap_filter_rec_t *cache_out_filter_handle;
/*
* CACHE handler
* -------------
*
* Can we deliver this request from the cache?
* If yes:
* deliver the content by installing the CACHE_OUT filter.
* If no:
* check whether we're allowed to try cache it
* If yes:
* add CACHE_SAVE filter
* If No:
* oh well.
*/
{
const char *auth;
/* Delay initialization until we know we are handling a GET */
if (r->method_number != M_GET) {
return DECLINED;
}
&cache_module);
/*
* Which cache module (if any) should handle this request?
*/
return DECLINED;
}
/* make space for the per request config */
&cache_module);
if (!cache) {
}
/* save away the possible providers */
/*
* Are we allowed to serve cached info at all?
*/
/* find certain cache controlling headers */
/* First things first - does the request allow us to return
* cached information at all? If not, just decline the request.
*/
if (auth) {
return DECLINED;
}
/*
* Try to serve this request from the cache.
*
* If no existing cache file (DECLINED)
* add cache_save filter
* If cached file (OK)
* clear filter stack
* add cache_out filter
* return OK
*/
rv = cache_select(r);
if (!lookup) {
/*
* Add cache_save filter to cache this request. Choose
* the correct filter by checking if we are a subrequest
* or not.
*/
if (r->main) {
r->server,
"Adding CACHE_SAVE_SUBREQ filter for %s",
r->uri);
NULL, r, r->connection);
}
else {
r->server, "Adding CACHE_SAVE filter for %s",
r->uri);
NULL, r, r->connection);
}
"Adding CACHE_REMOVE_URL filter for %s",
r->uri);
/* Add cache_remove_url filter to this request to remove a
* stale cache entry if needed. Also put the current cache
* request rec in the filter context, as the request that
* is available later during running the filter maybe
* different due to an internal redirect.
*/
cache, r, r->connection);
}
else {
if (cache->stale_headers) {
r->server, "Restoring request headers for %s",
r->uri);
}
/* Delete our per-request configuration. */
}
}
else {
/* error */
"cache: error returned while checking for cached "
}
return DECLINED;
}
/* if we are a lookup, we are exiting soon one way or another; Restore
* the headers. */
if (lookup) {
if (cache->stale_headers) {
"Restoring request headers.");
}
/* Delete our per-request configuration. */
}
rv = ap_meets_conditions(r);
/* If we are a lookup, we have to return DECLINED as we have no
* way of knowing if we will be able to serve the content.
*/
if (lookup) {
return DECLINED;
}
/* Return cached status. */
return rv;
}
/* If we're a lookup, we can exit now instead of serving the content. */
if (lookup) {
return OK;
}
/* Serve up the content */
/* We are in the quick handler hook, which means that no output
* filters have been set. So lets run the insert_filter hook.
*/
/*
* Add cache_out filter to serve this request. Choose
* the correct filter by checking if we are a subrequest
* or not.
*/
if (r->main) {
}
else {
}
/*
* Remove all filters that are before the cache_out filter. This ensures
* that we kick off the filter stack with our cache_out filter being the
* first in the chain. This make sense because we want to restore things
* in the same manner as we saved them.
* There may be filters before our cache_out filter, because
*
* 1. We call ap_set_content_type during cache_select. This causes
* Content-Type specific filters to be added.
* 2. We call the insert_filter hook. This causes filters e.g. like
* the ones set with SetOutputFilter to be added.
*/
next = r->output_filters;
}
/* kick off the filter stack */
if (rv != APR_SUCCESS) {
if(rv != AP_FILTER_ERROR) {
"cache: error returned while trying to return %s "
"cached data",
}
return rv;
}
return OK;
}
/*
* CACHE_OUT filter
* ----------------
*
* Deliver cached content (headers and body) up the stack.
*/
{
request_rec *r = f->r;
&cache_module);
if (!cache) {
/* user likely configured CACHE_OUT manually; they should use mod_cache
* configuration to do that */
"CACHE_OUT enabled unexpectedly");
}
"cache: running CACHE_OUT filter");
/* restore status of cached response */
/* XXX: This exposes a bug in mem_cache, since it does not
* restore the status into it's handle. */
/* recall_headers() was called in cache_select() */
/* This filter is done once it has served up its content */
"cache: serving %s", r->uri);
}
/*
* CACHE_SAVE filter
* ---------------
*
* Decide whether or not this content should be cached.
* If we decide no it should not:
* remove the filter from the chain
* If we decide yes it should:
* Have we already started saving the response?
* If we have started, pass the data to the storage manager via store_body
* Otherwise:
* Check to see if we *can* save this particular response.
* If we can, call cache_create_entity() and save the headers and body
* Finally, pass the data to the next filter (the network or whatever)
*/
{
int date_in_errhdr = 0;
request_rec *r = f->r;
char *reason;
apr_pool_t *p;
&cache_module);
/* Setup cache_request_rec */
&cache_module);
if (!cache) {
/* user likely configured CACHE_SAVE manually; they should really use
* mod_cache configuration to do that
*/
}
p = r->pool;
/*
* Pass Data to Cache
* ------------------
* This section passes the brigades into the cache modules, but only
* if the setup section (see below) is complete.
*/
if (cache->block_response) {
/* We've already sent down the response and EOS. So, ignore
* whatever comes now.
*/
return APR_SUCCESS;
}
/* have we already run the cachability check and set up the
* cached file handle?
*/
if (cache->in_checked) {
/* pass the brigades into the cache, then pass them
* up the filter stack
*/
}
/*
* Setup Data in Cache
* -------------------
* This section opens the cache entity and sets various caching
* parameters, and decides whether this URL should be cached at
* all. This section is* run before the above section.
*/
/* read expiry date; if a bad date, then leave it so the client can
* read it
*/
}
}
}
else {
exp = APR_DATE_BAD;
}
/* read the last-modified date; if the date is bad, then delete it */
}
if (lastmod == APR_DATE_BAD) {
}
}
else {
}
/* read the etag and cache-control from the entity */
}
}
/*
* what responses should we not cache?
*
* At this point we decide based on the response headers whether it
* is appropriate _NOT_ to cache the data from the server. There are
* a whole lot of conditions that prevent us from caching this data.
* They are tested here one by one to be clear and unambiguous.
*/
&& r->status != HTTP_MULTIPLE_CHOICES
&& r->status != HTTP_MOVED_PERMANENTLY
&& r->status != HTTP_NOT_MODIFIED) {
/* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
* We don't cache 206, because we don't (yet) cache partial responses.
* We include 304 Not Modified here too as this is the origin server
* telling us to serve the cached copy.
*/
}
/* if a broken Expires header is present, don't cache it */
}
{
/* if a Expires header is in the past, don't cache it */
reason = "Expires header already expired, not cacheable";
}
/* if query string present but no expiration time, don't cache it
* (RFC 2616/13.9)
*/
reason = "Query string present but no expires header";
}
else if (r->status == HTTP_NOT_MODIFIED &&
/* if the server said 304 Not Modified but we have no cache
* file - pass this untouched to the user agent, it's not for us.
*/
reason = "HTTP Status 304 Not Modified";
}
/* 200 OK response from HTTP/1.0 and up without Last-Modified,
* Etag, or Expires headers.
*/
/* Note: mod-include clears last_modified/expires/etags - this
* is why we have an optional function for a key-gen ;-)
*/
reason = "No Last-Modified, Etag, or Expires headers";
}
else if (r->header_only) {
/* HEAD requests */
reason = "HTTP HEAD request";
}
else if (!conf->store_nostore &&
/* RFC2616 14.9.2 Cache-Control: no-store response
* indicating do not cache, or stop now if you are
* trying to cache it.
*/
/* FIXME: The Cache-Control: no-store could have come in on a 304,
* FIXME: while the original request wasn't conditional. IOW, we
* FIXME: made the the request conditional earlier to revalidate
* FIXME: our cached response.
*/
reason = "Cache-Control: no-store present";
}
else if (!conf->store_private &&
/* RFC2616 14.9.1 Cache-Control: private response
* this object is marked for this user's eyes only. Behave
* as a tunnel.
*/
/* FIXME: See above (no-store) */
reason = "Cache-Control: private present";
}
/* RFC2616 14.8 Authorisation:
* if authorisation is included in the request, we don't cache,
* but we can cache if the following exceptions are true:
* 1) If Cache-Control: s-maxage is included
* 2) If Cache-Control: must-revalidate is included
* 3) If Cache-Control: public is included
*/
reason = "Authorization required";
}
else if (ap_cache_liststr(NULL,
"*", NULL)) {
reason = "Vary header contains '*'";
}
else if (r->no_cache) {
/* or we've been asked not to cache it above */
reason = "r->no_cache present";
}
if (reason) {
"cache: %s not cached. Reason: %s", r->unparsed_uri,
reason);
/* remove this filter from the chain */
/* ship the data up the stack */
}
/* Make it so that we don't execute this path again. */
/* Set the content length if known.
*/
}
if (cl) {
char *errp;
}
}
if (!cl) {
/* if we don't get the content-length, see if we have all the
* buckets and use their length to calculate the size
*/
apr_bucket *e;
int all_buckets_here=0;
int unresolved_length = 0;
size=0;
for (e = APR_BRIGADE_FIRST(in);
e != APR_BRIGADE_SENTINEL(in);
e = APR_BUCKET_NEXT(e))
{
if (APR_BUCKET_IS_EOS(e)) {
break;
}
if (APR_BUCKET_IS_FLUSH(e)) {
unresolved_length = 1;
continue;
}
break;
}
}
if (!all_buckets_here) {
size = -1;
}
}
/* It's safe to cache the response.
*
* There are two possiblities at this point:
* - cache->handle == NULL. In this case there is no previously
* cached entity anywhere on the system. We must create a brand
* new entity and store the response in it.
* - cache->stale_handle != NULL. In this case there is a stale
* entity in the system which needs to be replaced by new
* content (unless the result was 304 Not Modified, which means
* the cached entity is actually fresh, and we should update
* the headers).
*/
/* Did we have a stale cache entry that really is stale? */
if (cache->stale_handle) {
if (r->status == HTTP_NOT_MODIFIED) {
/* Oh, hey. It isn't that stale! Yay! */
}
else {
/* Oh, well. Toss it. */
/* Treat the request as if it wasn't conditional. */
}
}
/* no cache handle, create a new entity */
/* We only set info->status upon the initial creation. */
}
/* Caching layer declined the opportunity to cache the response */
}
"cache: Caching url: %s", r->unparsed_uri);
/* We are actually caching this response. So it does not
* make sense to remove this entity any more.
*/
"cache: Removing CACHE_REMOVE_URL filter.");
/*
* We now want to update the cache file header information with
* the new date, last modified, expire and content length and write
* it away to our cache file. First, we determine these values from
* the response, using heuristics if appropriate.
*
* In addition, we make HTTP/1.1 age calculations and write them away
* too.
*/
/* Read the date. Generate one if one is not supplied */
date_in_errhdr = 1;
}
else {
}
}
else {
}
now = apr_time_now();
char *dates;
/* no date header (or bad header)! */
/* add one; N.B. use the time _now_ rather than when we were checking
* the cache
*/
if (date_in_errhdr == 1) {
}
"cache: Added date header");
}
else {
}
/* set response_time for HTTP/1.1 age calculations */
/* get the request time */
/* check last-modified date */
/* if it's in the future, then replace by date */
r->server,
"cache: Last modified is in the future, "
"replacing with now");
}
/* if no expiry date then
* if lastmod
* expiry date = date + min((date - lastmod) * factor, maxexpire)
* else
* expire date = date + defaultexpire
*/
if (exp == APR_DATE_BAD) {
char expire_hdr[APR_RFC822_DATE_LEN];
/* if lastmod == date then you get 0*conf->factor which results in
* an expiration time of now. This causes some problems with
* freshness calculations, so we choose the else path...
*/
}
}
}
else {
}
}
/* We found a stale entry which wasn't really stale. */
if (cache->stale_handle) {
/* Load in the saved status and clear the status line. */
r->status_line = NULL;
/* RFC 2616 10.3.5 states that entity headers are not supposed
* to be in the 304 response. Therefore, we need to combine the
* response headers with the cached headers *before* we update
* the cached headers.
*
* However, before doing that, we need to first merge in
* err_headers_out and we also need to strip any hop-by-hop
* headers that might have snuck in.
*/
r->err_headers_out);
r->server);
/* Merge in our cached headers. However, keep any updated values. */
}
/* Write away header information to cache. It is possible that we are
* trying to update headers for an entity which has already been cached.
*
* This may fail, due to an unwritable cache area. E.g. filesystem full,
* permissions problems or a read-only (re)mount. This must be handled
* later.
*/
/* Did we just update the cached headers on a revalidated response?
*
* If so, we can now decide what to serve to the client. This is done in
* the same way as with a regular response, but conditions are now checked
* against the cached or merged response headers.
*/
if (cache->stale_handle) {
int status;
/* Restore the original request headers and see if we need to
* return anything else than the cached response (ie. the original
* request was conditional).
*/
status = ap_meets_conditions(r);
}
else {
}
/* Before returning we need to handle the possible case of an
* unwritable cache. Rather than leaving the entity in the cache
* and having it constantly re-validated, now that we have recalled
* the body it is safe to try and remove the url from the cache.
*/
if (rv != APR_SUCCESS) {
"cache: updating headers with store_headers failed. "
"Removing cached url.");
/* Probably a mod_disk_cache cache area has been (re)mounted
* read-only, or that there is a permissions problem.
*/
"cache: attempt to remove url from cache unsuccessful.");
}
}
}
if(rv != APR_SUCCESS) {
"cache: store_headers failed");
}
}
/*
* CACHE_REMOVE_URL filter
* ---------------
*
* This filter gets added in the quick handler every time the CACHE_SAVE filter
* gets inserted. Its purpose is to remove a confirmed stale cache entry from
* the cache.
*
* CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
* the response is a canned error message, which removes the content filters
* and thus the CACHE_SAVE filter from the chain.
*
* CACHE_REMOVE_URL expects cache request rec within its context because the
* request this filter runs on can be different from the one whose cache entry
* should be removed, due to internal redirects.
*
* Note that CACHE_SAVE_URL (as a content-set filter, hence run before the
* protocol filters) will remove this filter if it decides to cache the file.
* Therefore, if this filter is left in, it must mean we need to toss any
* existing files.
*/
{
request_rec *r = f->r;
/* Setup cache_request_rec */
if (!cache) {
/* user likely configured CACHE_REMOVE_URL manually; they should really
* use mod_cache configuration to do that. So:
* 1. Remove ourselves
* 2. Do nothing and bail out
*/
"cache: CACHE_REMOVE_URL enabled unexpectedly");
}
/* Now remove this cache entry from the cache */
/* remove ourselves */
}
/* -------------------------------------------------------------- */
/* Setup configurable data */
{
/* array of URL prefixes for which caching is enabled */
/* array of URL prefixes for which caching is disabled */
/* maximum time to cache a document */
/* default time to cache a document */
/* factor used to estimate Expires date from LastModified date */
ps->factor_set = 0;
ps->no_last_mod_ignore_set = 0;
ps->no_last_mod_ignore = 0;
ps->ignorecachecontrol = 0;
ps->ignorecachecontrol_set = 0;
ps->store_private = 0;
ps->store_private_set = 0;
ps->store_nostore = 0;
ps->store_nostore_set = 0;
/* array of headers that should not be stored in cache */
return ps;
}
{
/* array of URL prefixes for which caching is disabled */
/* array of URL prefixes for which caching is enabled */
/* maximum time to cache a document */
/* default time to cache a document */
/* factor used to estimate Expires date from LastModified date */
(overrides->no_last_mod_ignore_set == 0)
(overrides->ignorecachecontrol_set == 0)
ps->store_private =
(overrides->store_private_set == 0)
ps->store_nostore =
(overrides->store_nostore_set == 0)
ps->ignore_headers =
return ps;
}
int flag)
{
conf =
&cache_module);
return NULL;
}
{
conf =
&cache_module);
return NULL;
}
int flag)
{
conf =
&cache_module);
return NULL;
}
int flag)
{
conf =
&cache_module);
return NULL;
}
const char *header)
{
char **new;
conf =
&cache_module);
/* if header None is listed clear array */
}
else {
/* Only add header if no "None" has been found in header list
* so far.
* (When 'None' is passed, IGNORE_HEADERS_SET && nelts == 0.)
*/
}
}
return NULL;
}
const char *type,
const char *url)
{
struct cache_enable *new;
if (*type == '/') {
"provider (%s) starts with a '/'. Are url and provider switched?",
type);
}
conf =
&cache_module);
return NULL;
}
} else {
}
return NULL;
}
const char *url)
{
struct cache_disable *new;
conf =
&cache_module);
return NULL;
}
} else {
}
return NULL;
}
const char *arg)
{
conf =
&cache_module);
return NULL;
}
const char *arg)
{
conf =
&cache_module);
return NULL;
}
const char *arg)
{
conf =
&cache_module);
return NULL;
}
const char *arg)
{
double val;
conf =
&cache_module);
return "CacheLastModifiedFactor value must be a float";
}
return NULL;
}
{
/* This is the means by which unusual (non-unix) os's may find alternate
*/
if (!cache_generate_key) {
}
return OK;
}
static const command_rec cache_cmds[] =
{
/* XXX
* Consider a new config directive that enables loading specific cache
* implememtations (like mod_cache_mem, mod_cache_file, etc.).
* Rather than using a LoadModule directive, admin would use something
* like CacheModule mem_cache_module | file_cache_module, etc,
* which would cause the approprpriate cache module to be loaded.
* This is more intuitive that requiring a LoadModule directive.
*/
"A cache type and partial URL prefix below which "
"caching is enabled"),
"A partial URL prefix below which caching is disabled"),
"The maximum time in seconds to cache a document"),
"The minimum time in seconds to cache a document"),
"The default time in seconds to cache a document"),
"Ignore Responses where there is no Last Modified Header"),
"Ignore requests from the client for uncached content"),
"Ignore 'Cache-Control: private' and store private content"),
"Ignore 'Cache-Control: no-store' and store sensitive content"),
"A space separated list of headers that should not be "
"stored by the cache"),
"The factor used to estimate Expires date from "
"LastModified date"),
{NULL}
};
static void register_hooks(apr_pool_t *p)
{
/* cache initializer */
/* cache handler */
/* cache filters
* XXX The cache filters need to run right after the handlers and before
* any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
*
* Depending on the type of request (subrequest / main request) they
* need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
* filters. Thus create two filter handles for each type:
* cache_save_filter_handle / cache_out_filter_handle to be used by
* main requests and
* cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
* to be run by subrequest
*/
/*
* CACHE_SAVE must go into the filter chain after a possible DEFLATE
* filter to ensure that the compressed content is stored.
* Incrementing filter type by 1 ensures his happens.
*/
ap_register_output_filter("CACHE_SAVE",
NULL,
/*
* CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
* handle subrequsts. Decrementing filter type by 1 ensures this
* happens.
*/
ap_register_output_filter("CACHE_SAVE_SUBREQ",
NULL,
/*
* CACHE_OUT must go into the filter chain after a possible DEFLATE
* filter to ensure that already compressed cache objects do not
* get compressed again. Incrementing filter type by 1 ensures
* his happens.
*/
ap_register_output_filter("CACHE_OUT",
NULL,
/*
* CACHE_OUT_SUBREQ must go into the filter chain before SUBREQ_CORE to
* handle subrequsts. Decrementing filter type by 1 ensures this
* happens.
*/
ap_register_output_filter("CACHE_OUT_SUBREQ",
NULL,
/* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
* run even if the response is a canned error message, which
* removes the content filters.
*/
ap_register_output_filter("CACHE_REMOVE_URL",
NULL,
}
{
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_cache_config, /* create per-server config structure */
merge_cache_config, /* merge per-server config structures */
cache_cmds, /* command apr_table_t */
};