mod_disk_cache.c revision dfc11e2ca51e32163317f52344553069431d9da1
* limitations under the License. * mod_disk_cache: Disk Based HTTP 1.1 Cache. * Flow to Find the .data file: * Read in <hash>.header file (may contain Format #1 or Format #2) * If format #1 (Contains a list of Vary Headers): * Use each header name (from .header) with our request values (headers_in) to * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz * re-read in <hash>.header (must be format #2) * apr_array_t vary_headers (delimited by CRLF) * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format) * entity name (dobj->name) [length is in disk_cache_info_t->name_len] * r->headers_out (delimited by CRLF) * r->headers_in (delimited by CRLF) /* Forward declarations */ /* htcacheclean may remove directories underneath us. * So, we'll try renaming three times at a cost of 0.002 seconds. /* 1000 micro-seconds aka 0.001 seconds. */ /* This assumes that the tempfiles are on the same file system * as the cache_root. If not, then we need a file copy/move "disk_cache: rename tempfile to file failed:" /* clean up the temporary file */ /* These two functions get and put state information into the data * file for an ap_cache_el, this state information will be read * and written transparent to clients of this module /* read the data from the cache file */ /* Store it away so we can get it later. */ /* Note that we could optimize this by conditionally doing the palloc * depending upon the size. */ /* check that we have the same URL */ /* Would strncmp be correct? */ * - Handle multiple-value headers better. (sort them?) * - Handle Case in-sensitive Values better. * This isn't the end of the world, since it just lowers the cache * hit rate, but it would be nice to fix. * The majority are case insenstive if they are values (encoding etc). * Most of rfc2616 is case insensitive on header contents. * So the better solution may be to identify headers which should be * treated case-sensitive? * HTTP URI's (3.2.3) [host and scheme are insensitive] * HTTP-date values (3.3.1) * 3.7 Media Types [exerpt] * The type, subtype, and parameter attribute names are case- * insensitive. Parameter values might or might not be case-sensitive, * depending on the semantics of the parameter name. * Comparison of expectation values is case-insensitive for unquoted * tokens (including the 100-continue token), and is case-sensitive for * quoted-string expectation-extensions. /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */ * Hook and mod_cache callback functions /* we don't support caching of range requests (yet) */ "disk_cache: URL %s partial content response not cached",
/* Note, len is -1 if unknown so don't trust it too hard */ "disk_cache: URL %s failed the size check " "disk_cache: URL %s failed the size check " /* Allocate and initialize cache_object_t and disk_cache_object_t */ /* Save the cache root */ /* Look up entity keyed to 'url' */ "disk_cache: Cannot cache files to disk without a CacheRoot specified.");
/* Create and init the cache object */ /* Open the headers file */ /* Save the cache root */ /* read the format from the cache file */ "disk_cache: Cannot parse vary header file: %s",
"disk_cache: File '%s' has a version mismatch. File had version: %d.",
/* oops, not vary as it turns out */ /* This wasn't a Vary Format file, so we must seek to the * start of the file again, so that later reads work. /* Read the bytes to setup the cache_info fields */ /* Is this a cached HEAD request? */ "disk_cache: HEAD request cached, non-HEAD requested, ignoring: %s",
/* When we are in the quick handler we don't have the per-directory * configuration, so this check only takes the global setting of * the EnableSendFile directive into account. /* Atomic check - does the body file belong to the header file? */ /* Initialize the cache_handle callback functions */ "disk_cache: Recalled cached URL info header %s",
dobj->
name);
/* Oh dear, no luck matching header to the body */ "disk_cache: Cached URL info header '%s' didn't match body, ignoring this entry",
/* Null out the cache object pointer so next time we start from scratch */ /* Get disk cache object from cache handle */ /* Delete headers file */ /* Will only result in an output if httpd is started with -e debug. * For reason see log_error_core for the case s == NULL. "disk_cache: Failed to delete headers file %s from cache.",
/* Will only result in an output if httpd is started with -e debug. * For reason see log_error_core for the case s == NULL. "disk_cache: Failed to delete data file %s from cache.",
/* now delete directories as far as possible up to our cache root */ * now walk our way back to the cache root, delete everything * in the way as far as possible * Note: due to the way we constructed the file names in * header_file and data_file, we are guaranteed that the * cache_root is suffixed by at least one '/' which will be * turned into a terminating null by this loop. Therefore, * we won't either delete or go above our cache root. "disk_cache: Deleting directory %s from cache",
"Premature end of vary array.");
if (p > 0 && w[p -
1] ==
'\n') {
if (p >
1 && w[p -
2] ==
CR) {
/* If we've finished reading the array, break out of the loop. */ /* ### What about APR_EOF? */ "Premature end of cache headers.");
/* Delete terminal (CR?)LF */ /* Indeed, the host's '\n': '\012' for UNIX; '\015' for MacOS; '\025' for OS/390 -- whatever the script generates. if (p > 0 && w[p -
1] ==
'\n') {
if (p >
1 && w[p -
2] ==
CR) {
/* If we've finished reading the headers, break out of the loop. */ /* Chances are that we received an ASCII header text instead of * the expected EBCDIC header lines. Try to auto-detect: for (
cp = w; *
cp !=
'\0'; ++
cp) {
"CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
#
endif /*APR_CHARSET_EBCDIC*/ /* if we see a bogus header don't ignore it. Shout and scream */ * Reads headers from a buffer and returns an array of headers. * Returns NULL on file error * This routine tries to deal with too long lines and continuation lines. * @@@: XXX: FIXME: currently the headers are passed thru un-merged. * Is that okay, or should they be collapsed where possible? /* This case should not happen... */ "disk_cache: recalling headers; but no header fd for %s",
dobj->
name);
/* Call routine to read the header lines/status line */ "disk_cache: Recalled headers for URL %s",
dobj->
name);
/* If we were initially opened as a vary format, rollback * that internal state for the moment so we can recreate the * vary format hints in the appropriate directory. "disk_cache: could not create temp file %s",
"disk_cache: could not create temp file %s",
"disk_cache: could not write info to header file %s",
"disk_cache: could not write out-headers to header file %s",
/* Parse the vary header and dump those fields from the headers_in. */ /* FIXME: Make call to the same thing cache_select calls to crack Vary. */ "disk_cache: could not write in-headers to header file %s",
/* are we done completely? if so, pass any trailing buckets right through */ /* have we seen eos yet? */ /* honour flush buckets, we'll get called again */ /* metadata buckets are preserved as is */ /* read the bucket, write to the cache */ "disk_cache: Error when reading bucket for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */ /* don't write empty buckets to the cache */ /* Attempt to create the data file at the last possible moment, if * the body is empty, we don't write a file at all, and save an inode. /* write to the cache, leave if we fail */ "disk_cache: Error when writing cache file for URL %s",
/* Remove the intermediate cache file and return non-APR_SUCCESS */ "disk_cache: URL %s failed the size check " /* Remove the intermediate cache file and return non-APR_SUCCESS */ /* have we reached the limit of how much we're prepared to write in one * go? If so, leave, we'll get called again. This prevents us from trying * to swallow too much data at once, or taking so long to write the data /* Was this the final bucket? If yes, close the temp file and perform "disk_cache: Discarding body for URL %s " "because connection has been aborted.",
/* Remove the intermediate cache file and return non-APR_SUCCESS */ "disk_cache: URL %s failed the size check " /* Remove the intermediate cache file and return non-APR_SUCCESS */ "disk_cache: URL %s didn't receive complete response, not caching",
/* Remove the intermediate cache file and return non-APR_SUCCESS */ /* All checks were fine, we're good to go when the commit comes */ /* write the headers to disk at the last possible moment */ /* move header and data tempfiles to the final destination */ /* remove the cached items completely on any failure */ "disk_cache: commit_entity: URL '%s' not cached due to earlier disk error.",
"disk_cache: commit_entity: Headers and body for URL %s cached.",
/* XXX: Set default values */ * mod_disk_cache configuration directives handlers. /* TODO: canonicalize cache_root and strip off any trailing slashes */ * Consider eliminating the next two directives in favor of * Ian's prime number hash... * filename = "/key % prime1 /key %prime2/key %prime3" return "CacheDirLevels value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return "CacheDirLength value must be an integer greater than 0";
return "CacheDirLevels*CacheDirLength value must not be higher than 20";
return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
"The directory to store cache files"),
"The number of levels of subdirectories in the cache"),
"The number of characters in subdirectory names"),
"The minimum file size to cache a document"),
"The maximum file size to cache a document"),
"The maximum quantity of data to attempt to read and cache in one go"),
"The maximum time taken to attempt to read and cache in go"),
NULL,
/* merge per-server config structures */