mod_cache.c revision 9c5109a7051450f93d24e64eced0bdc6a29d8417
/* Copyright 2000-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define CORE_PRIVATE
#include "mod_cache.h"
/* -------------------------------------------------------------- */
/* Handles for cache filters, resolved at startup to eliminate
* a name-to-function mapping on each request
*/
static ap_filter_rec_t *cache_save_filter_handle;
static ap_filter_rec_t *cache_out_filter_handle;
/*
* CACHE handler
* -------------
*
* Can we deliver this request from the cache?
* If yes:
* deliver the content by installing the CACHE_OUT filter.
* If no:
* check whether we're allowed to try cache it
* If yes:
* add CACHE_SAVE filter
* If No:
* oh well.
*/
{
const char *pragma, *auth;
char *url;
char *path;
/* Delay initialization until we know we are handling a GET */
if (r->method_number != M_GET) {
return DECLINED;
}
uri = r->parsed_uri;
url = r->unparsed_uri;
&cache_module);
/*
* Which cache module (if any) should handle this request?
*/
return DECLINED;
}
/* make space for the per request config */
&cache_module);
if (!cache) {
}
/* save away the possible providers */
/*
* Are we allowed to serve cached info at all?
*/
/* find certain cache controlling headers */
/* first things first - does the request allow us to return
* cached information at all? If not, just decline the request.
*
* Note that there is a big difference between not being allowed
* to cache a request (no-store) and not being allowed to return
* a cached request without revalidation (max-age=0).
*
* Caching is forbidden under the following circumstances:
*
* - RFC2616 14.9.2 Cache-Control: no-store
* - Pragma: no-cache
* - Any requests requiring authorization.
*/
"incoming request is asking for a uncached version of "
"%s, but we know better and are ignoring it", url);
}
else {
"cache: no-cache or authorization forbids caching "
"of %s", url);
return DECLINED;
}
}
/*
* Try to serve this request from the cache.
*
* If no existing cache file (DECLINED)
* add cache_save filter
* If cached file (OK)
* clear filter stack
* add cache_out filter
* return OK
*/
if (!lookup) {
/* add cache_save filter to cache this request */
r->connection);
}
}
else {
/* error */
"cache: error returned while checking for cached "
}
return DECLINED;
}
/* We have located a suitable cache file now. */
}
rv = ap_meets_conditions(r);
/* Return cached status. */
return rv;
}
/* If we're a lookup, we can exit now instead of serving the content. */
if (lookup) {
return OK;
}
/* Serve up the content */
/* We are in the quick handler hook, which means that no output
* filters have been set. So lets run the insert_filter hook.
*/
r, r->connection);
/* kick off the filter stack */
if (rv != APR_SUCCESS) {
"cache: error returned while trying to return %s "
"cached data",
return rv;
}
return OK;
}
/*
* CACHE_OUT filter
* ----------------
*
* Deliver cached content (headers and body) up the stack.
*/
{
request_rec *r = f->r;
&cache_module);
if (!cache) {
/* user likely configured CACHE_OUT manually; they should use mod_cache
* configuration to do that */
"CACHE_OUT enabled unexpectedly");
}
"cache: running CACHE_OUT filter");
/* recall_body() was called in cache_select_url() */
/* This filter is done once it has served up its content */
"cache: serving %s", r->uri);
}
/*
* CACHE_SAVE filter
* ---------------
*
* Decide whether or not this content should be cached.
* If we decide no it should:
* remove the filter from the chain
* If we decide yes it should:
* pass the data to the storage manager
* pass the data to the next filter (the network)
*
*/
{
int rv;
int date_in_errhdr = 0;
request_rec *r = f->r;
char *url = r->unparsed_uri;
char *reason;
apr_pool_t *p;
/* check first whether running this filter has any point or not */
/* If the user has Cache-Control: no-store from RFC 2616, don't store! */
}
/* Setup cache_request_rec */
&cache_module);
if (!cache) {
/* user likely configured CACHE_SAVE manually; they should really use
* mod_cache configuration to do that
*/
}
p = r->pool;
/*
* Pass Data to Cache
* ------------------
* This section passes the brigades into the cache modules, but only
* if the setup section (see below) is complete.
*/
/* have we already run the cachability check and set up the
* cached file handle?
*/
if (cache->in_checked) {
/* pass the brigades into the cache, then pass them
* up the filter stack
*/
if (rv != APR_SUCCESS) {
}
}
/*
* Setup Data in Cache
* -------------------
* This section opens the cache entity and sets various caching
* parameters, and decides whether this URL should be cached at
* all. This section is* run before the above section.
*/
/* read expiry date; if a bad date, then leave it so the client can
* read it
*/
}
}
}
else {
exp = APR_DATE_BAD;
}
/* read the last-modified date; if the date is bad, then delete it */
}
}
}
else {
}
/* read the etag and cache-control from the entity */
}
}
/*
* what responses should we not cache?
*
* At this point we decide based on the response headers whether it
* is appropriate _NOT_ to cache the data from the server. There are
* a whole lot of conditions that prevent us from caching this data.
* They are tested here one by one to be clear and unambiguous.
*/
&& r->status != HTTP_MULTIPLE_CHOICES
&& r->status != HTTP_MOVED_PERMANENTLY
&& r->status != HTTP_NOT_MODIFIED) {
/* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
* We don't cache 206, because we don't (yet) cache partial responses.
* We include 304 Not Modified here too as this is the origin server
* telling us to serve the cached copy.
*/
}
/* if a broken Expires header is present, don't cache it */
}
/* if query string present but no expiration time, don't cache it
* (RFC 2616/13.9)
*/
reason = "Query string present but no expires header";
}
/* if the server said 304 Not Modified but we have no cache
* file - pass this untouched to the user agent, it's not for us.
*/
reason = "HTTP Status 304 Not Modified";
}
/* 200 OK response from HTTP/1.0 and up without Last-Modified,
* Etag, or Expires headers.
*/
/* Note: mod-include clears last_modified/expires/etags - this
* is why we have an optional function for a key-gen ;-)
*/
reason = "No Last-Modified, Etag, or Expires headers";
}
else if (r->header_only) {
/* HEAD requests */
reason = "HTTP HEAD request";
}
/* RFC2616 14.9.2 Cache-Control: no-store response
* indicating do not cache, or stop now if you are
* trying to cache it */
reason = "Cache-Control: no-store present";
}
/* RFC2616 14.9.1 Cache-Control: private
* this object is marked for this user's eyes only. Behave
* as a tunnel.
*/
reason = "Cache-Control: private present";
}
/* RFC2616 14.8 Authorisation:
* if authorisation is included in the request, we don't cache,
* but we can cache if the following exceptions are true:
* 1) If Cache-Control: s-maxage is included
* 2) If Cache-Control: must-revalidate is included
* 3) If Cache-Control: public is included
*/
reason = "Authorization required";
}
else if (r->no_cache) {
/* or we've been asked not to cache it above */
reason = "no_cache present";
}
if (reason) {
/* remove this object from the cache
* BillS Asks.. Why do we need to make this call to remove_url?
* leave it in for now..
*/
cache_remove_url(r, url);
/* remove this filter from the chain */
/* ship the data up the stack */
}
/* Set the content length if known.
*/
}
if (cl) {
char *errp;
}
}
if (!cl) {
/* if we don't get the content-length, see if we have all the
* buckets and use their length to calculate the size
*/
apr_bucket *e;
int all_buckets_here=0;
int unresolved_length = 0;
size=0;
for (e = APR_BRIGADE_FIRST(in);
e != APR_BRIGADE_SENTINEL(in);
e = APR_BUCKET_NEXT(e))
{
if (APR_BUCKET_IS_EOS(e)) {
break;
}
if (APR_BUCKET_IS_FLUSH(e)) {
unresolved_length = 1;
continue;
}
break;
}
}
if (!all_buckets_here) {
size = -1;
}
}
/* It's safe to cache the response.
*
* There are two possiblities at this point:
* - cache->handle == NULL. In this case there is no previously
* cached entity anywhere on the system. We must create a brand
* new entity and store the response in it.
* - cache->handle != NULL. In this case there is a stale
* entity in the system which needs to be replaced by new
* content (unless the result was 304 Not Modified, which means
* the cached entity is actually fresh, and we should update
* the headers).
*/
/* no cache handle, create a new entity */
}
/* pre-existing cache handle and 304, make entity fresh */
else if (r->status == HTTP_NOT_MODIFIED) {
/* update headers: TODO */
/* remove this filter ??? */
/* XXX is this right? we must set rv to something other than OK
* in this path
*/
}
/* pre-existing cache handle and new entity, replace entity
* with this one
*/
else {
}
/* Caching layer declined the opportunity to cache the response */
}
"cache: Caching url: %s", url);
/*
* We now want to update the cache file header information with
* the new date, last modified, expire and content length and write
* it away to our cache file. First, we determine these values from
* the response, using heuristics if appropriate.
*
* In addition, we make HTTP/1.1 age calculations and write them away
* too.
*/
/* Read the date. Generate one if one is not supplied */
date_in_errhdr = 1;
}
else {
}
}
else {
}
now = apr_time_now();
char *dates;
/* no date header (or bad header)! */
/* add one; N.B. use the time _now_ rather than when we were checking
* the cache
*/
if (date_in_errhdr == 1) {
}
"cache: Added date header");
}
else {
}
/* set response_time for HTTP/1.1 age calculations */
/* get the request time */
/* check last-modified date */
/* if it's in the future, then replace by date */
r->server,
"cache: Last modified is in the future, "
"replacing with now");
}
/* if no expiry date then
* if lastmod
* expiry date = date + min((date - lastmod) * factor, maxexpire)
* else
* expire date = date + defaultexpire
*/
if (exp == APR_DATE_BAD) {
/* if lastmod == date then you get 0*conf->factor which results in
* an expiration time of now. This causes some problems with
* freshness calculations, so we choose the else path...
*/
}
}
else {
}
}
/*
* Write away header information to cache.
*/
if (rv == APR_SUCCESS) {
}
if (rv != APR_SUCCESS) {
}
}
/* -------------------------------------------------------------- */
/* Setup configurable data */
{
/* array of URL prefixes for which caching is enabled */
/* array of URL prefixes for which caching is disabled */
/* maximum time to cache a document */
/* default time to cache a document */
/* factor used to estimate Expires date from LastModified date */
ps->factor_set = 0;
/* default percentage to force cache completion */
ps->complete_set = 0;
ps->no_last_mod_ignore_set = 0;
ps->no_last_mod_ignore = 0;
ps->ignorecachecontrol = 0;
ps->ignorecachecontrol_set = 0 ;
return ps;
}
{
/* array of URL prefixes for which caching is disabled */
/* array of URL prefixes for which caching is enabled */
/* maximum time to cache a document */
/* default time to cache a document */
/* factor used to estimate Expires date from LastModified date */
/* default percentage to force cache completion */
(overrides->no_last_mod_ignore_set == 0)
(overrides->ignorecachecontrol_set == 0)
return ps;
}
int flag)
{
conf =
&cache_module);
return NULL;
}
{
conf =
&cache_module);
return NULL;
}
const char *type,
const char *url)
{
struct cache_enable *new;
conf =
&cache_module);
return NULL;
}
const char *url)
{
struct cache_enable *new;
conf =
&cache_module);
return NULL;
}
const char *arg)
{
conf =
&cache_module);
return NULL;
}
const char *arg)
{
conf =
&cache_module);
return NULL;
}
const char *arg)
{
double val;
conf =
&cache_module);
return "CacheLastModifiedFactor value must be a float";
}
return NULL;
}
const char *arg)
{
int val;
conf =
&cache_module);
return "CacheForceCompletion value must be a percentage";
}
return NULL;
}
{
/* This is the means by which unusual (non-unix) os's may find alternate
*/
if (!cache_generate_key) {
}
return OK;
}
static const command_rec cache_cmds[] =
{
/* XXX
* Consider a new config directive that enables loading specific cache
* implememtations (like mod_cache_mem, mod_cache_file, etc.).
* Rather than using a LoadModule directive, admin would use something
* like CacheModule mem_cache_module | file_cache_module, etc,
* which would cause the approprpriate cache module to be loaded.
* This is more intuitive that requiring a LoadModule directive.
*/
"A cache type and partial URL prefix below which "
"caching is enabled"),
"A partial URL prefix below which caching is disabled"),
"The maximum time in seconds to cache a document"),
"The default time in seconds to cache a document"),
"Ignore Responses where there is no Last Modified Header"),
NULL,
"Ignore requests from the client for uncached content"),
"The factor used to estimate Expires date from "
"LastModified date"),
"Percentage of download to arrive for the cache to force "
"complete transfer"),
{NULL}
};
static void register_hooks(apr_pool_t *p)
{
/* cache initializer */
/* cache handler */
/* cache filters
* XXX The cache filters need to run right after the handlers and before
* any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
* Make them AP_FTYPE_CONTENT for now.
* XXX ianhH:they should run AFTER all the other content filters.
*/
ap_register_output_filter("CACHE_SAVE",
NULL,
/* CACHE_OUT must go into the filter chain before SUBREQ_CORE to
* handle subrequsts. Decrementing filter type by 1 ensures this
* happens.
*/
ap_register_output_filter("CACHE_OUT",
NULL,
}
{
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_cache_config, /* create per-server config structure */
merge_cache_config, /* merge per-server config structures */
cache_cmds, /* command apr_table_t */
};