htcacheclean.c revision 9fe74ffcdea85800f04a7222f716f78ae60cce51
/* Copyright 2001-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheclean.c: simple program for cleaning of
* the disk cache of the Apache HTTP server
*
* Contributed by Andreas Steinmetz <ast@domdv.de>
* 8 Oct 2004
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_ring.h"
#include "apr_date.h"
#include <unistd.h>
#endif
#include <stdlib.h>
#endif
/* mod_disk_cache.c extract start */
#define DISK_FORMAT_VERSION 0
typedef struct {
/* Indicates the format of the header struct stored on-disk. */
int format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
/* The number of times we've cached this entity. */
/* Miscellaneous time values. */
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
/* mod_disk_cache.c extract end */
/* mod_disk_cache.c related definitions start */
/*
* this is based on #define AP_TEMPFILE "/aptmpXXXXXX"
*
* the above definition could be reworked into the following:
*
* #define AP_TEMPFILE_PREFIX "/"
* #define AP_TEMPFILE_BASE "aptmp"
* #define AP_TEMPFILE_SUFFIX "XXXXXX"
* #define AP_TEMPFILE_BASELEN strlen(AP_TEMPFILE_BASE)
* #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX)
* #define AP_TEMPFILE AP_TEMPFILE_PREFIX AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX
*
* these definitions would then match the definitions below:
*/
#define AP_TEMPFILE_BASE "aptmp"
#define AP_TEMPFILE_SUFFIX "XXXXXX"
/* mod_disk_cache.c related definitions end */
/* define the following for debugging */
/*
* Note: on Linux delays <= 2ms are busy waits without
* scheduling, so never use a delay <= 2ms below
*/
#define SECS_PER_MIN 60
#define KBYTE 1024
#define MBYTE 1048576
typedef struct _direntry
{
int type;
/* headers file modification time */
/* body file modification time */
/* headers file size */
/* body or temporary file size */
char *basename;
} DIRENTRY;
typedef struct _entry
{
/* cache entry exiration time */
/* cache entry time of last response to client */
/* headers file modification time */
/* body file modification time */
/* headers file size */
/* body or temporary file size */
/* fileset base name */
char *basename;
} ENTRY;
/* file deletion count for nice mode */
static int delcount;
/* flag: true if SIGINT or SIGTERM occurred */
static int interrupted;
/* flag: true means user said apache is not running */
static int realclean;
/* flag: true means print statistics */
static int verbose;
/* flag: true means nice mode is activated */
static int benice;
/* flag: true means dry run, don't actually delete anything */
static int dryrun;
/* string length of the path to the proxy directory */
static int baselen;
/* start time of this processing run */
static apr_time_t now;
/* stderr file handle */
static apr_file_t *errfile;
/* file size summary for deleted unsolicited files */
static apr_off_t unsolicited;
/* ENTRY ring anchor */
/*
* fake delete for debug purposes
*/
#ifdef DEBUG
#define apr_file_remove fake_file_remove
{
/* stat and printing to simulate some deletion system load and to
display what would actually have happened */
}
#endif
/*
* called on SIGINT or SIGTERM
*/
{
#ifdef DEBUG
#endif
interrupted = 1;
}
/*
* called in out of memory condition
*/
{
static int called = 0;
/* be careful to call exit() only once */
if (!called) {
called = 1;
exit(1);
}
return APR_ENOMEM;
}
/*
* print purge statistics
*/
{
char ttype;
char stype;
char mtype;
char utype;
if (!verbose) {
return;
}
ttype = 'K';
ttype = 'M';
}
stype = 'K';
stype = 'M';
}
mtype = 'K';
mtype = 'M';
}
if (unsolicited) {
utype = 'K';
unsolicited /= KBYTE;
if (unsolicited >= KBYTE) {
utype = 'M';
unsolicited /= KBYTE;
}
if (!unsolicited && !ufrag) {
ufrag = 1;
}
}
"total size was %d.%d%c, total size now %d.%d%c\n",
}
/*
* delete a single file
*/
{
char *nextpath;
apr_pool_t *p;
if (dryrun) {
return;
}
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
apr_file_remove(nextpath, p);
apr_pool_destroy(p);
if (benice) {
if (++delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* delete cache file set
*/
{
char *nextpath;
apr_pool_t *p;
if (dryrun) {
return;
}
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
apr_file_remove(nextpath, p);
apr_file_remove(nextpath, p);
apr_pool_destroy(p);
if (benice) {
delcount += 2;
if (delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* walk the cache directory tree
*/
{
apr_pool_t *p;
apr_hash_t *h;
apr_hash_index_t *i;
apr_file_t *fd;
char *nextpath;
char *base;
char *ext;
DIRENTRY *d;
DIRENTRY *t;
DIRENTRY *n;
ENTRY *e;
int skip;
int retries;
apr_pool_create(&p, pool);
h = apr_hash_make(p);
skip = 0;
return 1;
}
/* skip first two entries which will always be '.' and '..' */
if (skip < 2) {
skip++;
continue;
}
d = apr_pcalloc(p, sizeof(DIRENTRY));
}
if (interrupted) {
return 1;
}
for(d = APR_RING_FIRST(&anchor);
d=n) {
n = APR_RING_NEXT(d, link);
if (!base++) {
}
/* there may be temporary files which may be gone before
processing, always skip these if not in realclean mode */
continue;
}
/* this may look strange but apr_stat() may return errno which
is system dependent and there may be transient failures,
so just blindly retry for a short while */
do
{
if (status != APR_SUCCESS) {
}
/* what may happen here is that apache did create a file which
we did detect but then does delete the file before we can
get file information, so if we don't get any file information
we will ignore the file in this case */
if (status != APR_SUCCESS) {
if (!realclean && !interrupted) {
continue;
}
return 1;
}
return 1;
}
continue;
}
continue;
}
if (!ext) {
}
continue;
}
*ext = '\0';
/* if a user manually creates a '.header' file */
if (d->basename[0] == '\0')
continue;
if (t)
d = t;
continue;
}
*ext = '\0';
/* if a user manually creates a '.data' file */
if (d->basename[0] == '\0')
continue;
if (t) {
d = t;
}
}
}
if (interrupted) {
return 1;
}
switch(d->type) {
case HEADERDATA:
== APR_SUCCESS) {
len = sizeof(disk_cache_info_t);
== APR_SUCCESS) {
break;
}
}
else {
}
}
/* we have a somehow unreadable headers file which is associated
* with a data file. this may be caused by apache currently
* rewriting the headers file. thus we may delete the file set
* either in realclean mode or if the headers file modification
* timestamp is not within a specified positive or negative offset
* to the current time.
*/
current = apr_time_now();
unsolicited += d->hsize;
unsolicited += d->dsize;
}
break;
/* single data and header files may be deleted either in realclean
* mode or if their modification timestamp is not within a
* specified positive or negative offset to the current time.
* this handling is necessary due to possible race conditions
* between apache and this process
*/
case HEADER:
current = apr_time_now();
unsolicited += d->hsize;
}
break;
case DATA:
current = apr_time_now();
unsolicited += d->dsize;
}
break;
/* temp files may only be deleted in realclean mode which
* is asserted above if a tempfile is in the hash array
*/
case TEMP:
unsolicited += d->dsize;
break;
}
}
if (interrupted) {
return 1;
}
apr_pool_destroy(p);
if (benice) {
}
if (interrupted) {
return 1;
}
return 0;
}
/*
* purge cache entries
*/
{
ENTRY *e;
ENTRY *n;
sum = 0;
entries = 0;
for (e = APR_RING_FIRST(&root);
e = APR_RING_NEXT(e, link)) {
entries++;
}
return;
}
/* process all entries with a timestamp in the future, this may
* happen if a wrong system time is corrected
*/
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
entries--;
APR_RING_REMOVE(e, link);
if (!interrupted) {
}
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process all entries with are expired */
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
entries--;
APR_RING_REMOVE(e, link);
if (!interrupted)
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process remaining entries oldest to newest, the check for an emtpy
* ring actually isn't necessary except when the compiler does
* corrupt 64bit arithmetics which happend to me once, so better safe
* than sorry
*/
e = APR_RING_NEXT(e, link)) {
oldest = e;
}
}
entries--;
}
if (!interrupted) {
}
}
/*
* usage info
*/
static void usage(void)
{
"disk cache.\n");
"-lLIMIT\n");
"-LLIMIT\n");
"every INTERVAL minutes. This\n"
" option is mutually exclusive with "
"the -D, -v and -r options.\n");
"This option is mutually\n"
" exclusive with the -d option.\n");
"This option is mutually exclusive\n"
" with the -d option.\n");
"the Apache web server\n"
" is not running. This option is "
"mutually exclusive with the -d option.\n");
"in favour of other processes.\n");
"the disk cache.\n");
"size limit in KBytes.\n");
"size limit in MBytes.\n");
"was a modification\n"
" of the disk cache. This option is only "
"possible together with\n"
" the -d option.\n");
exit(1);
}
/*
* main
*/
{
apr_getopt_t *o;
int retries;
int isdaemon;
int limit_found;
int intelligent;
int dowork;
char opt;
const char *arg;
char *proxypath;
char *path;
interrupted = 0;
repeat = 0;
isdaemon = 0;
dryrun = 0;
limit_found = 0;
max = 0;
verbose = 0;
realclean = 0;
benice = 0;
intelligent = 0;
return 1;
}
return 1;
}
while (1) {
break;
else if (status == APR_SUCCESS)
switch (opt) {
case 'i':
if (intelligent)
usage();
intelligent = 1;
break;
case 'D':
if (dryrun)
usage();
dryrun = 1;
break;
case 'n':
if (benice)
usage();
benice = 1;
break;
case 'v':
if (verbose)
usage();
verbose = 1;
break;
case 'r':
if (realclean)
usage();
realclean = 1;
break;
case 'd':
if (isdaemon)
usage();
isdaemon = 1;
repeat *= SECS_PER_MIN;
break;
case 'l':
if (limit_found)
usage();
limit_found = 1;
break;
case 'L':
if (limit_found)
usage();
limit_found = 1;
break;
case 'p':
if (proxypath)
usage();
usage();
break;
}
else usage();
}
usage();
}
usage();
}
if (!isdaemon && intelligent) {
usage();
}
usage();
}
usage();
}
#ifndef DEBUG
if (isdaemon) {
}
#endif
do
{
now = apr_time_now();
delcount = 0;
unsolicited = 0;
dowork = 0;
switch (intelligent) {
case 0:
dowork = 1;
break;
case 1:
do
{
if (status != APR_SUCCESS)
if (status == APR_SUCCESS) {
intelligent = 2;
}
dowork = 1;
break;
case 2:
do
{
if (status != APR_SUCCESS)
if (status == APR_SUCCESS) {
dowork = 1;
break;
}
intelligent = 1;
dowork = 1;
break;
}
if (dowork && !interrupted) {
} else if (!isdaemon && !interrupted) {
"An error occurred, cache cleaning aborted.\n");
return 1;
}
if (intelligent && !interrupted) {
do
{
if (status != APR_SUCCESS)
if (status == APR_SUCCESS) {
intelligent = 2;
}
else
intelligent = 1;
}
}
current = apr_time_now();
} else {
}
/* we can't sleep the whole delay time here apiece as this is racy
* with respect to interrupt delivery - think about what happens
* if we have tested for an interrupt, then get scheduled
* before the apr_sleep() call and while waiting for the cpu
* we do get an interrupt
*/
if (isdaemon) {
while (delay && !interrupted) {
if (delay > APR_USEC_PER_SEC) {
} else {
delay = 0;
}
}
}
} while (isdaemon && !interrupted);
if (!isdaemon && interrupted) {
"Cache cleaning aborted due to user request.\n");
return 1;
}
return 0;
}