/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheclean.c: simple program for cleaning of
* the disk cache of the Apache HTTP server
*
* Contributed by Andreas Steinmetz <ast domdv.de>
* 8 Oct 2004
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_md5.h"
#include "apr_ring.h"
#include "apr_date.h"
#include "apr_buckets.h"
#include "../modules/cache/cache_common.h"
#include "../modules/cache/cache_disk_common.h"
#include <unistd.h>
#endif
#include <stdlib.h>
#endif
/* define the following for debugging */
/*
* Note: on Linux delays <= 2ms are busy waits without
* scheduling, so never use a delay <= 2ms below
*/
typedef struct _direntry {
} DIRENTRY;
typedef struct _entry {
} ENTRY;
anything */
files */
/* short program name as called */
/* what did we clean? */
struct stats {
};
#ifdef DEBUG
/*
* fake delete for debug purposes
*/
{
/* stat and printing to simulate some deletion system load and to
display what would actually have happened */
}
#endif
/*
* called on SIGINT or SIGTERM
*/
{
#ifdef DEBUG
#endif
interrupted = 1;
}
/*
* called in out of memory condition
*/
{
static int called = 0;
/* be careful to call exit() only once */
if (!called) {
called = 1;
exit(1);
}
return APR_ENOMEM;
}
/*
* print purge statistics
*/
{
if (!verbose) {
return;
}
ttype = 'K';
ttype = 'M';
}
stype = 'K';
stype = 'M';
}
mtype = 'K';
mtype = 'M';
}
if (unsolicited) {
utype = 'K';
unsolicited /= KBYTE;
if (unsolicited >= KBYTE) {
utype = 'M';
unsolicited /= KBYTE;
}
if (!unsolicited && !ufrag) {
ufrag = 1;
}
}
s->inodes);
", total %sinodes now "
: "", s->nodes);
}
/**
* Round the value up to the given threshold.
*/
if (round > 1) {
}
return val;
}
/*
* delete parent directories
*/
{
apr_pool_t *p;
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
/* If asked to delete dirs, do so now. We don't care if it fails.
* If it fails, it likely means there was something else there.
*/
const char *vary;
while (end) {
*end = 0;
/* remove the directory */
if (!apr_dir_remove(nextpath, p)) {
(*nodes)--;
/* vary directory found? */
if (!apr_file_remove(nextpath, p)) {
(*nodes)--;
}
}
}
else {
break;
}
}
}
apr_pool_destroy(p);
if (benice) {
if (++delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* delete a single file
*/
{
char *nextpath;
apr_pool_t *p;
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
if (dryrun) {
(*nodes)--;
}
}
else if (!apr_file_remove(nextpath, p)) {
(*nodes)--;
}
apr_pool_destroy(p);
if (benice) {
if (++delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* delete cache file set
*/
{
char *nextpath;
apr_pool_t *p;
/* temp pool, otherwise lots of memory could be allocated */
apr_pool_create(&p, pool);
if (dryrun) {
(*nodes)--;
}
}
else if (!apr_file_remove(nextpath, p)) {
(*nodes)--;
}
if (dryrun) {
(*nodes)--;
}
}
else if (!apr_file_remove(nextpath, p)) {
(*nodes)--;
}
apr_pool_destroy(p);
if (benice) {
delcount += 2;
if (delcount >= DELETE_NICE) {
delcount = 0;
}
}
}
/*
* list the cache directory tree
*/
{
apr_pool_t *p;
char *url;
apr_pool_create(&p, pool);
return 1;
}
continue;
}
return 1;
}
}
== APR_SUCCESS) {
if (format == DISK_FORMAT_VERSION) {
len = sizeof(disk_cache_info_t);
== APR_SUCCESS) {
== APR_SUCCESS) {
if (listextended) {
/* stat the header file */
if (APR_SUCCESS != apr_file_info_get(
/* ignore the file */
}
!= apr_stat(
&dinfo,
p,
path,
"/",
p,
NULL),
p)) {
/* ignore the file */
}
/* ignore the file */
}
else {
"%s %" APR_SIZE_T_FMT
" %" APR_SIZE_T_FMT
" %d %" APR_SIZE_T_FMT
" %" APR_TIME_T_FMT
" %" APR_TIME_T_FMT
" %" APR_TIME_T_FMT
" %" APR_TIME_T_FMT
" %d %d\n",
url,
: 0, round),
}
}
else {
/* stat the data file */
!= apr_stat(
&dinfo,
p,
path,
"/",
p,
NULL),
p)) {
/* ignore the file */
}
/* ignore the file */
}
else {
url);
}
}
}
break;
}
}
}
}
}
}
}
if (interrupted) {
return 1;
}
apr_pool_destroy(p);
if (benice) {
}
if (interrupted) {
return 1;
}
return 0;
}
/*
* walk the cache directory tree
*/
{
apr_pool_t *p;
apr_hash_t *h;
apr_hash_index_t *i;
DIRENTRY *d, *t, *n;
ENTRY *e;
apr_pool_create(&p, pool);
h = apr_hash_make(p);
return 1;
}
continue;
}
d = apr_pcalloc(p, sizeof(DIRENTRY));
(*nodes)++;
}
if (interrupted) {
return 1;
}
for (d = APR_RING_FIRST(&anchor);
d=n) {
n = APR_RING_NEXT(d, link);
if (!base++) {
}
/* there may be temporary files which may be gone before
* processing, always skip these if not in realclean mode
*/
continue;
}
}
/* this may look strange but apr_stat() may return an error which
* is system dependent and there may be transient failures,
* so just blindly retry for a short while
*/
do {
if (status != APR_SUCCESS) {
}
/* what may happen here is that apache did create a file which
* we did detect but then does delete the file before we can
* get file information, so if we don't get any file information
* we will ignore the file in this case
*/
if (status != APR_SUCCESS) {
if (!realclean && !interrupted) {
continue;
}
return 1;
}
return 1;
}
continue;
}
continue;
}
if (!ext) {
}
continue;
}
*ext = '\0';
/* if a user manually creates a '.header' file */
if (d->basename[0] == '\0') {
continue;
}
if (t) {
d = t;
}
continue;
}
*ext = '\0';
/* if a user manually creates a '.data' file */
if (d->basename[0] == '\0') {
continue;
}
if (t) {
d = t;
}
}
}
if (interrupted) {
return 1;
}
void *hvalue;
d = hvalue;
switch(d->type) {
case HEADERDATA:
APR_OS_DEFAULT, p) == APR_SUCCESS) {
&len) == APR_SUCCESS) {
if (format == DISK_FORMAT_VERSION) {
len = sizeof(disk_cache_info_t);
&len) == APR_SUCCESS) {
nodes, p);
}
break;
}
else {
}
}
else if (format == VARY_FORMAT_VERSION) {
/* This must be a URL that added Vary headers later,
* so kill the orphaned .data file
*/
}
else {
nodes, p);
}
break;
}
else {
/* We didn't recognise the format, kill the files */
break;
}
}
else {
}
}
/* we have a somehow unreadable headers file which is associated
* with a data file. this may be caused by apache currently
* rewriting the headers file. thus we may delete the file set
* either in realclean mode or if the headers file modification
* timestamp is not within a specified positive or negative offset
* to the current time.
*/
current = apr_time_now();
unsolicited += d->hsize;
unsolicited += d->dsize;
}
break;
/* single data and header files may be deleted either in realclean
* mode or if their modification timestamp is not within a
* specified positive or negative offset to the current time.
* this handling is necessary due to possible race conditions
* between apache and this process
*/
case HEADER:
current = apr_time_now();
APR_OS_DEFAULT, p) == APR_SUCCESS) {
&len) == APR_SUCCESS) {
if (format == VARY_FORMAT_VERSION) {
&len) == APR_SUCCESS) {
}
}
break;
}
}
else if (format == DISK_FORMAT_VERSION) {
len = sizeof(disk_cache_info_t);
&len) == APR_SUCCESS) {
break;
}
else {
}
}
else {
break;
}
}
else {
}
}
unsolicited += d->hsize;
}
break;
case DATA:
current = apr_time_now();
unsolicited += d->dsize;
}
break;
/* temp files may only be deleted in realclean mode which
* is asserted above if a tempfile is in the hash array
*/
case TEMP:
unsolicited += d->dsize;
break;
}
}
if (interrupted) {
return 1;
}
apr_pool_destroy(p);
if (benice) {
}
if (interrupted) {
return 1;
}
return 0;
}
/*
* purge cache entries
*/
{
struct stats s;
s.sum = 0;
s.entries = 0;
s.dfuture = 0;
s.dexpired = 0;
s.dfresh = 0;
for (e = APR_RING_FIRST(&root);
e = APR_RING_NEXT(e, link)) {
s.entries++;
}
printstats(path, &s);
return;
}
/* process all entries with a timestamp in the future, this may
* happen if a wrong system time is corrected
*/
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
s.entries--;
s.dfuture++;
APR_RING_REMOVE(e, link);
if (!interrupted) {
printstats(path, &s);
}
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process all entries with are expired */
for (e = APR_RING_FIRST(&root);
n = APR_RING_NEXT(e, link);
s.entries--;
s.dexpired++;
APR_RING_REMOVE(e, link);
if (!interrupted) {
printstats(path, &s);
}
return;
}
}
e = n;
}
if (interrupted) {
return;
}
/* process remaining entries oldest to newest, the check for an emtpy
* ring actually isn't necessary except when the compiler does
* corrupt 64bit arithmetics which happend to me once, so better safe
* than sorry
*/
e = APR_RING_NEXT(e, link)) {
oldest = e;
}
}
s.entries--;
s.dfresh++;
}
if (!interrupted) {
printstats(path, &s);
}
}
{
if (APR_STATUS_IS_ENOENT(rv)) {
return rv;
}
if (rv != APR_SUCCESS) {
return rv;
}
== APR_SUCCESS) {
/* tolerate the directory not being empty, the cache may have
* attempted to recreate the directory in the mean time.
*/
break;
}
}
} else {
if (APR_SUCCESS != rv) {
&rv);
break;
}
}
}
if (rv == APR_SUCCESS) {
if (APR_ENOTEMPTY == rv) {
rv = APR_SUCCESS;
}
if (rv != APR_SUCCESS) {
}
}
return rv;
}
const char *rest)
{
if (rv != APR_SUCCESS) {
return rv;
}
rv = APR_ENOENT;
== APR_SUCCESS) {
: restlen));
if (APR_SUCCESS == rv) {
found = 1;
}
}
files = 1;
}
}
files = 1;
}
}
}
if (files) {
rv = APR_SUCCESS;
if (!dryrun) {
const char *remove;
}
}
}
}
}
/* If asked to delete dirs, do so now. We don't care if it fails.
* If it fails, it likely means there was something else there.
*/
}
if (found) {
return APR_SUCCESS;
}
return rv;
}
/**
* Delete a specific URL from the cache.
*/
{
int i, k;
unsigned int x;
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
/* encode 128 bits as 22 characters, using a modified uuencoding
* the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
* 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
*/
for (i = 0, k = 0; i < 15; i += 3) {
}
/* one byte left */
x = digest[15];
tmp[k] = 0;
/* automatically find the directory levels */
}
/*
* usage info
*/
{
if (error) {
}
"%s -- program for cleaning the disk cache." NL
"Usage: %s [-Dvtrn] -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
" %s [-nti] -dINTERVAL -pPATH [-lLIMIT|-LLIMIT] [-PPIDFILE]" NL
" %s [-Dvt] -pPATH URL ..." NL
"Options:" NL
" -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
" This option is mutually exclusive with the -D, -v and -r" NL
" options." NL
" -D Do a dry run and don't delete anything. This option is mutually" NL
" exclusive with the -d option. When doing a dry run and deleting" NL
" directories with -t, the inodes reported deleted in the stats" NL
" cannot take into account the directories deleted, and will be" NL
" marked as an estimate." NL
" -v Be verbose and print statistics. This option is mutually" NL
" exclusive with the -d option." NL
" -r Clean thoroughly. This assumes that the Apache web server is " NL
" not running. This option is mutually exclusive with the -d" NL
" option and implies -t." NL
" -n Be nice. This causes slower processing in favour of other" NL
" processes." NL
" -t Delete all empty directories. By default only cache files are" NL
" removed, however with some configurations the large number of" NL
" directories created may require attention." NL
" -p Specify PATH as the root directory of the disk cache." NL
" -P Specify PIDFILE as the file to write the pid to." NL
" -R Specify amount to round sizes up to." NL
" -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
" or 'M' to the number for specifying KBytes or MBytes." NL
" -L Specify LIMIT as the total disk cache inode limit." NL
" -i Be intelligent and run only when there was a modification of" NL
" the disk cache. This option is only possible together with the" NL
" -d option." NL
" -a List the URLs currently stored in the cache. Variants of the" NL
" same URL will be listed once for each variant." NL
" -A List the URLs currently stored in the cache, along with their" NL
" attributes in the following order: url, header size, body size," NL
" status, entity version, date, expiry, request time," NL
" response time, body present, head request." NL
"Should an URL be provided on the command line, the URL will be" NL
"deleted from the cache. A reverse proxied URL is made up as follows:" NL
"http://<hostname>:<port><path>?[query]. So, for the path \"/\" on the" NL
"host \"localhost\" and port 80, the URL to delete becomes" NL
"\"http://localhost:80/?\". Note the '?' in the URL must always be" NL
"specified explicitly, whether a query string is present or not." NL,
);
exit(1);
}
"The option '%c' cannot be specified more than once",
option));
}
{
}
else {
if (errfile) {
"Could not write the pid file '%s': %pm" APR_EOL_STR,
pidfilename, &status);
}
exit(1);
}
}
/*
* main
*/
{
apr_getopt_t *o;
char opt;
const char *arg;
interrupted = 0;
repeat = 0;
isdaemon = 0;
dryrun = 0;
limit_found = 0;
inodes_found = 0;
max = 0;
inodes = 0;
round = 0;
verbose = 0;
realclean = 0;
benice = 0;
deldirs = 0;
intelligent = 0;
previous = 0; /* avoid compiler warning */
pidfilename = NULL;
return 1;
}
if (argc) {
}
return 1;
}
while (1) {
break;
}
else if (status != APR_SUCCESS) {
}
else {
char *end;
switch (opt) {
case 'i':
if (intelligent) {
}
intelligent = 1;
break;
case 'D':
if (dryrun) {
}
dryrun = 1;
break;
case 'n':
if (benice) {
}
benice = 1;
break;
case 't':
if (deldirs) {
}
deldirs = 1;
break;
case 'v':
if (verbose) {
}
verbose = 1;
break;
case 'r':
if (realclean) {
}
realclean = 1;
deldirs = 1;
break;
case 'd':
if (isdaemon) {
}
isdaemon = 1;
repeat *= SECS_PER_MIN;
break;
case 'l':
if (limit_found) {
}
limit_found = 1;
do {
if (rv == APR_SUCCESS) {
}
}
}
else if (*end && /* neither empty nor [Bb] */
rv = APR_EGENERAL;
}
}
if (rv != APR_SUCCESS) {
}
} while(0);
break;
case 'L':
if (inodes_found) {
}
inodes_found = 1;
do {
if (rv == APR_SUCCESS) {
}
}
}
else if (*end && /* neither empty nor [Bb] */
rv = APR_EGENERAL;
}
}
if (rv != APR_SUCCESS) {
}
} while(0);
break;
case 'a':
if (listurls) {
}
listurls = 1;
break;
case 'A':
if (listurls) {
}
listurls = 1;
listextended = 1;
break;
case 'p':
if (proxypath) {
}
}
break;
case 'P':
if (pidfilename) {
}
break;
case 'R':
if (round) {
}
if (rv == APR_SUCCESS) {
if (*end) {
}
else if (round < 0) {
}
}
if (rv != APR_SUCCESS) {
}
break;
} /* switch */
} /* else */
} /* while */
if (argc <= 1) {
}
if (!proxypath) {
usage("Option -p must be specified");
}
int deleted = 0;
int error = 0;
if (isdaemon) {
usage("Option -d cannot be used with URL arguments, aborting");
}
if (intelligent) {
usage("Option -i cannot be used with URL arguments, aborting");
}
if (limit_found) {
usage("Option -l cannot be used with URL arguments, aborting");
}
if (APR_SUCCESS == status) {
if (verbose) {
}
deleted = 1;
}
else if (APR_ENOENT == status) {
if (verbose) {
}
}
else {
if (verbose) {
}
error = 1;
}
o->ind++;
}
}
usage("Option -d must be greater than zero");
}
usage("Option -d cannot be used with -v, -r, -L or -D");
}
if (!isdaemon && intelligent) {
usage("Option -i cannot be used without -d");
}
usage("At least one of option -l or -L must be greater than zero");
}
}
if (pidfilename) {
* can report errors
*/
}
if (listurls) {
return (interrupted != 0);
}
#ifndef DEBUG
if (isdaemon) {
if (pidfilename) {
}
if (pidfilename) {
}
}
#endif
do {
now = apr_time_now();
delcount = 0;
unsolicited = 0;
dowork = 0;
switch (intelligent) {
case 0:
dowork = 1;
break;
case 1:
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
intelligent = 2;
}
dowork = 1;
break;
case 2:
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
dowork = 1;
}
break;
}
intelligent = 1;
dowork = 1;
break;
}
if (dowork && !interrupted) {
}
else if (!isdaemon && !interrupted) {
"aborted." APR_EOL_STR);
return 1;
}
if (intelligent && !interrupted) {
do {
if (status != APR_SUCCESS) {
}
if (status == APR_SUCCESS) {
intelligent = 2;
}
else {
intelligent = 1;
}
}
}
current = apr_time_now();
}
}
else {
}
/* we can't sleep the whole delay time here apiece as this is racy
* with respect to interrupt delivery - think about what happens
* if we have tested for an interrupt, then get scheduled
* before the apr_sleep() call and while waiting for the cpu
* we do get an interrupt
*/
if (isdaemon) {
while (delay && !interrupted) {
if (delay > APR_USEC_PER_SEC) {
}
else {
delay = 0;
}
}
}
} while (isdaemon && !interrupted);
if (!isdaemon && interrupted) {
"request." APR_EOL_STR);
return 1;
}
return 0;
}