htcacheclean.c revision a51acf58d9d82f52e0ee56106cd9282030f3f3be
/* Copyright 2001-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheclean.c: simple program for cleaning of
* the disk cache of the Apache HTTP server
*
* Contributed by Andreas Steinmetz <ast domdv.de>
* 8 Oct 2004
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_ring.h"
#include "apr_date.h"
#include <unistd.h>
#endif
#include <stdlib.h>
#endif
/* mod_disk_cache.c extract start */
#define DISK_FORMAT_VERSION 0
typedef struct {
/* Indicates the format of the header struct stored on-disk. */
int format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
/* The number of times we've cached this entity. */
/* Miscellaneous time values. */
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX ".data"
/* mod_disk_cache.c extract end */
/* mod_disk_cache.c related definitions start */
/*
* this is based on #define AP_TEMPFILE "/aptmpXXXXXX"
*
* the above definition could be reworked into the following:
*
* #define AP_TEMPFILE_PREFIX "/"
* #define AP_TEMPFILE_BASE "aptmp"
* #define AP_TEMPFILE_SUFFIX "XXXXXX"
* #define AP_TEMPFILE_BASELEN strlen(AP_TEMPFILE_BASE)
* #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX)
* #define AP_TEMPFILE AP_TEMPFILE_PREFIX AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX
*
* these definitions would then match the definitions below:
*/
#define AP_TEMPFILE_BASE "aptmp"
#define AP_TEMPFILE_SUFFIX "XXXXXX"
/* mod_disk_cache.c related definitions end */
/* define the following for debugging */
/*
* Note: on Linux delays <= 2ms are busy waits without
* scheduling, so never use a delay <= 2ms below
*/
#define SECS_PER_MIN 60
#define KBYTE 1024
#define MBYTE 1048576
typedef struct _direntry {
} DIRENTRY;
typedef struct _entry {
char *basename; /* fileset base name */
} ENTRY;
static int delcount; /* file deletion count for nice mode */
static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
static int realclean; /* flag: true means user said apache is not running */
static int verbose; /* flag: true means print statistics */
static int benice; /* flag: true means nice mode is activated */
static int dryrun; /* flag: true means dry run, don't actually delete
anything */
static int baselen; /* string length of the path to the proxy directory */
files */
/* short program name as called */
static const char *shortname = "htcacheclean";
#ifdef DEBUG
/*
* fake delete for debug purposes
*/
#define apr_file_remove fake_file_remove
{
/* stat and printing to simulate some deletion system load and to
display what would actually have happened */
}
#endif
/*
* called on SIGINT or SIGTERM
*/
{
#ifdef DEBUG
#endif
interrupted = 1;
}
/*
* called in out of memory condition
*/
{
static int called = 0;
/* be careful to call exit() only once */
if (!called) {
called = 1;
exit(1);
}
return APR_ENOMEM;
}
/*
* print purge statistics
*/
{
if (!verbose) {
return;
}
ttype = 'K';
ttype = 'M';
}
stype = 'K';
stype = 'M';
}
mtype = 'K';
mtype = 'M';
}
if (unsolicited) {
utype = 'K';
unsolicited /= KBYTE;
if (unsolicited >= KBYTE) {
utype = 'M';
unsolicited /= KBYTE;
}
if (!unsolicited && !ufrag) {
ufrag = 1;
}
}
"%d.%d%c" APR_EOL_STR,
}
/*
* delete a single file
*/
{
char *nextpath;
apr_pool_t *p;
if (dryrun) {
return;
}
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
apr_file_remove(nextpath, p);
apr_pool_destroy(p);
if (benice) {
if (++delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* delete cache file set
*/
{
char *nextpath;
apr_pool_t *p;
if (dryrun) {
return;
}
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
apr_file_remove(nextpath, p);
apr_file_remove(nextpath, p);
apr_pool_destroy(p);
if (benice) {
delcount += 2;
if (delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* walk the cache directory tree
*/
{
apr_pool_t *p;
apr_hash_t *h;
apr_hash_index_t *i;
apr_file_t *fd;
DIRENTRY *d, *t, *n;
ENTRY *e;
apr_pool_create(&p, pool);
h = apr_hash_make(p);
skip = 0;
return 1;
}
/* skip first two entries which will always be '.' and '..' */
if (skip < 2) {
skip++;
continue;
}
d = apr_pcalloc(p, sizeof(DIRENTRY));
}
if (interrupted) {
return 1;
}
for (d = APR_RING_FIRST(&anchor);
d=n) {
n = APR_RING_NEXT(d, link);
if (!base++) {
}
/* there may be temporary files which may be gone before
* processing, always skip these if not in realclean mode
*/
continue;
}
}
/* this may look strange but apr_stat() may return errno which
* is system dependent and there may be transient failures,
* so just blindly retry for a short while
*/
do {
if (status != APR_SUCCESS) {
}
/* what may happen here is that apache did create a file which
* we did detect but then does delete the file before we can
* get file information, so if we don't get any file information
* we will ignore the file in this case
*/
if (status != APR_SUCCESS) {
if (!realclean && !interrupted) {
continue;
}
return 1;
}
return 1;
}
continue;
}
continue;
}
if (!ext) {
}
continue;
}
*ext = '\0';
/* if a user manually creates a '.header' file */
if (d->basename[0] == '\0') {
continue;
}
if (t) {
d = t;
}
continue;
}
*ext = '\0';
/* if a user manually creates a '.data' file */
if (d->basename[0] == '\0') {
continue;
}
if (t) {
d = t;
}
}
}
if (interrupted) {
return 1;
}
void *hvalue;
d = hvalue;
switch(d->type) {
case HEADERDATA:
p) == APR_SUCCESS) {
len = sizeof(disk_cache_info_t);
&len) == APR_SUCCESS) {
break;
}
}
else {
}
}
/* we have a somehow unreadable headers file which is associated
* with a data file. this may be caused by apache currently
* rewriting the headers file. thus we may delete the file set
* either in realclean mode or if the headers file modification
* timestamp is not within a specified positive or negative offset
* to the current time.
*/
current = apr_time_now();
unsolicited += d->hsize;
unsolicited += d->dsize;
}
break;
/* single data and header files may be deleted either in realclean
* mode or if their modification timestamp is not within a
* specified positive or negative offset to the current time.
* this handling is necessary due to possible race conditions
* between apache and this process
*/
case HEADER:
current = apr_time_now();
unsolicited += d->hsize;
}
break;
case DATA:
current = apr_time_now();
unsolicited += d->dsize;
}
break;
/* temp files may only be deleted in realclean mode which
* is asserted above if a tempfile is in the hash array
*/
case TEMP:
unsolicited += d->dsize;
break;
}
}
if (interrupted) {
return 1;
}
apr_pool_destroy(p);
if (benice) {
}
if (interrupted) {
return 1;
}
return 0;
}
/*
* purge cache entries
*/
{
sum = 0;
entries = 0;
for (e = APR_RING_FIRST(&root);
e = APR_RING_NEXT(e, link)) {
entries++;
}
return;
}
/* process all entries with a timestamp in the future, this may
* happen if a wrong system time is corrected
*/
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
entries--;
APR_RING_REMOVE(e, link);
if (!interrupted) {
}
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process all entries with are expired */
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
entries--;
APR_RING_REMOVE(e, link);
if (!interrupted) {
}
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process remaining entries oldest to newest, the check for an emtpy
* ring actually isn't necessary except when the compiler does
* corrupt 64bit arithmetics which happend to me once, so better safe
* than sorry
*/
e = APR_RING_NEXT(e, link)) {
oldest = e;
}
}
entries--;
}
if (!interrupted) {
}
}
/*
* usage info
*/
#define NL APR_EOL_STR
static void usage(void)
{
"%s -- program for cleaning the disk cache." NL
"Usage: %s [-Dvrn] -pPATH -lLIMIT" NL
" %s [-Dvrn] -pPATH -LLIMIT" NL
" %s [-ni] -dINTERVAL -pPATH -lLIMIT" NL
" %s [-ni] -dINTERVAL -pPATH -LLIMIT" NL
"Options:" NL
" -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
" This option is mutually exclusive with the -D, -v and -r" NL
" options." NL
" -D Do a dry run and don't delete anything. This option is mutually" NL
" exclusive with the -d option." NL
" -v Be verbose and print statistics. This option is mutually" NL
" exclusive with the -d option." NL
" -r Clean thoroughly. This assumes that the Apache web server is " NL
" not running. This option is mutually exclusive with the -d" NL
" option." NL
" -n Be nice. This causes slower processing in favour of other" NL
" processes." NL
" -p Specify PATH as the root directory of the disk cache." NL
" -l Specify LIMIT as the total disk cache size limit in KBytes." NL
" -L Specify LIMIT as the total disk cache size limit in MBytes." NL
" -i Be intelligent and run only when there was a modification of" NL
" the disk cache. This option is only possible together with the" NL
" -d option." NL,
);
exit(1);
}
/*
* main
*/
{
apr_getopt_t *o;
char opt;
const char *arg;
interrupted = 0;
repeat = 0;
isdaemon = 0;
dryrun = 0;
limit_found = 0;
max = 0;
verbose = 0;
realclean = 0;
benice = 0;
intelligent = 0;
previous = 0; /* avoid compiler warning */
return 1;
}
if (argc) {
}
return 1;
}
while (1) {
break;
}
else if (status != APR_SUCCESS) {
usage();
}
else {
switch (opt) {
case 'i':
if (intelligent) {
usage();
}
intelligent = 1;
break;
case 'D':
if (dryrun) {
usage();
}
dryrun = 1;
break;
case 'n':
if (benice) {
usage();
}
benice = 1;
break;
case 'v':
if (verbose) {
usage();
}
verbose = 1;
break;
case 'r':
if (realclean) {
usage();
}
realclean = 1;
break;
case 'd':
if (isdaemon) {
usage();
}
isdaemon = 1;
repeat *= SECS_PER_MIN;
break;
case 'l':
if (limit_found) {
usage();
}
limit_found = 1;
break;
case 'L':
if (limit_found) {
usage();
}
limit_found = 1;
break;
case 'p':
if (proxypath) {
usage();
}
usage();
}
break;
} /* switch */
} /* else */
} /* while */
usage();
}
usage();
}
if (!isdaemon && intelligent) {
usage();
}
usage();
}
usage();
}
#ifndef DEBUG
if (isdaemon) {
}
#endif
do {
now = apr_time_now();
delcount = 0;
unsolicited = 0;
dowork = 0;
switch (intelligent) {
case 0:
dowork = 1;
break;
case 1:
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
intelligent = 2;
}
dowork = 1;
break;
case 2:
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
dowork = 1;
}
break;
}
intelligent = 1;
dowork = 1;
break;
}
if (dowork && !interrupted) {
}
else if (!isdaemon && !interrupted) {
"aborted." APR_EOL_STR);
return 1;
}
if (intelligent && !interrupted) {
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
intelligent = 2;
}
else {
intelligent = 1;
}
}
}
current = apr_time_now();
}
}
else {
}
/* we can't sleep the whole delay time here apiece as this is racy
* with respect to interrupt delivery - think about what happens
* if we have tested for an interrupt, then get scheduled
* before the apr_sleep() call and while waiting for the cpu
* we do get an interrupt
*/
if (isdaemon) {
while (delay && !interrupted) {
if (delay > APR_USEC_PER_SEC) {
}
else {
delay = 0;
}
}
}
} while (isdaemon && !interrupted);
if (!isdaemon && interrupted) {
"request." APR_EOL_STR);
return 1;
}
return 0;
}