logresolve.c revision 0b4b04d8621478ba59f0a6ba2950ddc02ab92b58
0066eddda7203f6345b56f77d146a759298dc635gryzor/* Copyright 1999-2005 The Apache Software Foundation or its licensors, as
0066eddda7203f6345b56f77d146a759298dc635gryzor * applicable.
fd9abdda70912b99b24e3bf1a38f26fde908a74cnd *
fd9abdda70912b99b24e3bf1a38f26fde908a74cnd * Licensed under the Apache License, Version 2.0 (the "License");
fd9abdda70912b99b24e3bf1a38f26fde908a74cnd * you may not use this file except in compliance with the License.
0066eddda7203f6345b56f77d146a759298dc635gryzor * You may obtain a copy of the License at
0066eddda7203f6345b56f77d146a759298dc635gryzor *
0066eddda7203f6345b56f77d146a759298dc635gryzor * http://www.apache.org/licenses/LICENSE-2.0
0066eddda7203f6345b56f77d146a759298dc635gryzor *
96ad5d81ee4a2cc66a4ae19893efc8aa6d06fae7jailletc * Unless required by applicable law or agreed to in writing, software
0066eddda7203f6345b56f77d146a759298dc635gryzor * distributed under the License is distributed on an "AS IS" BASIS,
0066eddda7203f6345b56f77d146a759298dc635gryzor * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * See the License for the specific language governing permissions and
2e545ce2450a9953665f701bb05350f0d3f26275nd * limitations under the License.
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen */
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen
0066eddda7203f6345b56f77d146a759298dc635gryzor/*
0066eddda7203f6345b56f77d146a759298dc635gryzor * logresolve 2.0
0066eddda7203f6345b56f77d146a759298dc635gryzor *
af33a4994ae2ff15bc67d19ff1a7feb906745bf8rbowen * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
3f08db06526d6901aa08c110b5bc7dde6bc39905nd * UUNET Canada, April 16, 1995
0066eddda7203f6345b56f77d146a759298dc635gryzor *
0066eddda7203f6345b56f77d146a759298dc635gryzor * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
0066eddda7203f6345b56f77d146a759298dc635gryzor * Rewritten again, and ported to APR by Colm MacCarthaigh
3f08db06526d6901aa08c110b5bc7dde6bc39905nd *
0066eddda7203f6345b56f77d146a759298dc635gryzor * Usage: logresolve [-s filename] [-c] < access_log > new_log
0066eddda7203f6345b56f77d146a759298dc635gryzor *
0066eddda7203f6345b56f77d146a759298dc635gryzor * Arguments:
0066eddda7203f6345b56f77d146a759298dc635gryzor * -s filename name of a file to record statistics
0066eddda7203f6345b56f77d146a759298dc635gryzor * -c check the DNS for a matching A record for the host.
0066eddda7203f6345b56f77d146a759298dc635gryzor *
f086b4b402fa9a2fefc7dda85de2a3cc1cd0a654rjung * Notes: (For historical interest)
0066eddda7203f6345b56f77d146a759298dc635gryzor *
0066eddda7203f6345b56f77d146a759298dc635gryzor * To generate meaningful statistics from an HTTPD log file, it's good
0066eddda7203f6345b56f77d146a759298dc635gryzor * to have the domain name of each machine that accessed your site, but
0066eddda7203f6345b56f77d146a759298dc635gryzor * doing this on the fly can slow HTTPD down.
1a1356f375e36db7bee379ea0684ab389579f798rbowen *
0066eddda7203f6345b56f77d146a759298dc635gryzor * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
0066eddda7203f6345b56f77d146a759298dc635gryzor * resolution off. Before running your stats program, just run your log
0066eddda7203f6345b56f77d146a759298dc635gryzor * file through this program (logresolve) and all of your IP numbers will
0066eddda7203f6345b56f77d146a759298dc635gryzor * be resolved into hostnames (where possible).
0066eddda7203f6345b56f77d146a759298dc635gryzor *
0066eddda7203f6345b56f77d146a759298dc635gryzor * logresolve takes an HTTPD access log (in the COMMON log file format,
0066eddda7203f6345b56f77d146a759298dc635gryzor * or any other format that has the IP number/domain name as the first
0066eddda7203f6345b56f77d146a759298dc635gryzor * field for that matter), and outputs the same file with all of the
0066eddda7203f6345b56f77d146a759298dc635gryzor * domain names looked up. Where no domain name can be found, the IP
0066eddda7203f6345b56f77d146a759298dc635gryzor * number is left in.
1f1b6bf13313fdd14a45e52e553d3ff28689b717coar *
01f52ba6a87aa39d3873a441369828875c471823trawick * To minimize impact on your nameserver, logresolve has its very own
01f52ba6a87aa39d3873a441369828875c471823trawick * internal hash-table cache. This means that each IP number will only
1f1b6bf13313fdd14a45e52e553d3ff28689b717coar * be looked up the first time it is found in the log file.
1f1b6bf13313fdd14a45e52e553d3ff28689b717coar *
1f1b6bf13313fdd14a45e52e553d3ff28689b717coar * The -c option causes logresolve to apply the same check as httpd
0066eddda7203f6345b56f77d146a759298dc635gryzor * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
0066eddda7203f6345b56f77d146a759298dc635gryzor * address, it looks up the IP addresses for the hostname and checks
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedooh * that one of these matches the original address.
01f52ba6a87aa39d3873a441369828875c471823trawick */
01f52ba6a87aa39d3873a441369828875c471823trawick
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr.h"
0066eddda7203f6345b56f77d146a759298dc635gryzor#include "apr_lib.h"
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr_hash.h"
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr_getopt.h"
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr_strings.h"
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr_file_io.h"
01f52ba6a87aa39d3873a441369828875c471823trawick#include "apr_network_io.h"
01f52ba6a87aa39d3873a441369828875c471823trawick
01f52ba6a87aa39d3873a441369828875c471823trawick#if APR_HAVE_STDLIB_H
8d71cfe50621fe78b960ccef99680b89bb61170elgentis#include <stdlib.h>
01f52ba6a87aa39d3873a441369828875c471823trawick#endif
01f52ba6a87aa39d3873a441369828875c471823trawick
01f52ba6a87aa39d3873a441369828875c471823trawickstatic apr_file_t *errfile;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic const char *shortname = "logresolve";
01f52ba6a87aa39d3873a441369828875c471823trawickstatic apr_hash_t *cache;
01f52ba6a87aa39d3873a441369828875c471823trawick
01f52ba6a87aa39d3873a441369828875c471823trawick/* Statistics */
20f499565e77defe9dab24dd85c02f38a1175855ndstatic int cachehits = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int cachesize = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int entries = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int resolves = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int withname = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int doublefailed = 0;
01f52ba6a87aa39d3873a441369828875c471823trawickstatic int noreverse = 0;
01f52ba6a87aa39d3873a441369828875c471823trawick
01f52ba6a87aa39d3873a441369828875c471823trawick/*
01f52ba6a87aa39d3873a441369828875c471823trawick * prints various statistics to output
20f499565e77defe9dab24dd85c02f38a1175855nd */
01f52ba6a87aa39d3873a441369828875c471823trawick#define NL APR_EOL_STR
01f52ba6a87aa39d3873a441369828875c471823trawickstatic void print_statistics (apr_file_t *output)
01f52ba6a87aa39d3873a441369828875c471823trawick{
01f52ba6a87aa39d3873a441369828875c471823trawick apr_file_printf(output, "logresolve Statistics:" NL);
01f52ba6a87aa39d3873a441369828875c471823trawick apr_file_printf(output, "Entries: %d" NL, entries);
0066eddda7203f6345b56f77d146a759298dc635gryzor apr_file_printf(output, " With name : %d" NL, withname);
0066eddda7203f6345b56f77d146a759298dc635gryzor apr_file_printf(output, " Resolves : %d" NL, resolves);
0066eddda7203f6345b56f77d146a759298dc635gryzor
0066eddda7203f6345b56f77d146a759298dc635gryzor if (noreverse) {
0066eddda7203f6345b56f77d146a759298dc635gryzor apr_file_printf(output, " - No reverse : %d" NL,
f086b4b402fa9a2fefc7dda85de2a3cc1cd0a654rjung noreverse);
727872d18412fc021f03969b8641810d8896820bhumbedooh }
0d0ba3a410038e179b695446bb149cce6264e0abnd
727872d18412fc021f03969b8641810d8896820bhumbedooh if (doublefailed) {
cc7e1025de9ac63bd4db6fe7f71c158b2cf09fe4humbedooh apr_file_printf(output, " - Double lookup failed : %d" NL,
0d0ba3a410038e179b695446bb149cce6264e0abnd doublefailed);
cc7e1025de9ac63bd4db6fe7f71c158b2cf09fe4humbedooh }
727872d18412fc021f03969b8641810d8896820bhumbedooh
0d0ba3a410038e179b695446bb149cce6264e0abnd apr_file_printf(output, "Cache hits : %d" NL, cachehits);
0d0ba3a410038e179b695446bb149cce6264e0abnd apr_file_printf(output, "Cache size : %d" NL, cachesize);
0d0ba3a410038e179b695446bb149cce6264e0abnd}
ac082aefa89416cbdc9a1836eaf3bed9698201c8humbedooh
0d0ba3a410038e179b695446bb149cce6264e0abnd/*
0d0ba3a410038e179b695446bb149cce6264e0abnd * usage info
0d0ba3a410038e179b695446bb149cce6264e0abnd */
727872d18412fc021f03969b8641810d8896820bhumbedoohstatic void usage(void)
0d0ba3a410038e179b695446bb149cce6264e0abnd{
0d0ba3a410038e179b695446bb149cce6264e0abnd apr_file_printf(errfile,
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedooh "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
205f749042ed530040a4f0080dbcb47ceae8a374rjung "Usage: %s [-s STATFILE] [-c]" NL
af33a4994ae2ff15bc67d19ff1a7feb906745bf8rbowen NL
0d0ba3a410038e179b695446bb149cce6264e0abnd "Options:" NL
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd " -s Record statistics to STATFILE when finished." NL
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd NL
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd " -c Perform double lookups when resolving IP addresses." NL,
0066eddda7203f6345b56f77d146a759298dc635gryzor shortname, shortname);
exit(1);
}
#undef NL
int main(int argc, const char * const argv[])
{
apr_file_t * outfile;
apr_file_t * infile;
apr_file_t * statsfile;
apr_sockaddr_t * ip;
apr_sockaddr_t * ipdouble;
apr_getopt_t * o;
apr_pool_t * pool;
apr_status_t status;
const char * arg;
char opt;
char * stats = NULL;
char * space;
char * hostname;
#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
char * inbuffer;
char * outbuffer;
#endif
char line[2048];
int doublelookups = 0;
if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
return 1;
}
atexit(apr_terminate);
if (argc) {
shortname = apr_filepath_name_get(argv[0]);
}
if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
return 1;
}
apr_file_open_stderr(&errfile, pool);
apr_getopt_init(&o, pool, argc, argv);
while (1) {
status = apr_getopt(o, "s:c", &opt, &arg);
if (status == APR_EOF) {
break;
}
else if (status != APR_SUCCESS) {
usage();
}
else {
switch (opt) {
case 'c':
if (doublelookups) {
usage();
}
doublelookups = 1;
break;
case 's':
if (stats) {
usage();
}
stats = apr_pstrdup(pool, arg);
break;
} /* switch */
} /* else */
} /* while */
apr_file_open_stdout(&outfile, pool);
apr_file_open_stdin(&infile, pool);
#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
/* Allocate two new 10k file buffers */
if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
(inbuffer = apr_palloc(pool, 10240)) == NULL) {
return 1;
}
/* Set the buffers */
apr_file_buffer_set(infile, inbuffer, 10240);
apr_file_buffer_set(outfile, outbuffer, 10240);
#endif
cache = apr_hash_make(pool);
while(apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
if (line[0] == '\0') {
continue;
}
/* Count our log entries */
entries++;
/* Check if this could even be an IP address */
if (!apr_isxdigit(line[0]) && line[0] != ':') {
withname++;
apr_file_puts(line, outfile);
continue;
}
/* Terminate the line at the next space */
if((space = strchr(line, ' ')) != NULL) {
*space = '\0';
}
/* See if we have it in our cache */
hostname = (char *) apr_hash_get(cache, (const void *)line,
strlen(line));
if (hostname) {
apr_file_printf(outfile, "%s %s", hostname, space + 1);
cachehits++;
continue;
}
/* Parse the IP address */
status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
if (status != APR_SUCCESS) {
/* Not an IP address */
withname++;
*space = ' ';
apr_file_puts(line, outfile);
continue;
}
/* This does not make much sense, but historically "resolves" means
* "parsed as an IP address". It does not mean we actually resolved
* the IP address into a hostname.
*/
resolves++;
/* From here on our we cache each result, even if it was not
* succesful
*/
cachesize++;
/* Try and perform a reverse lookup */
status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
if (status || hostname == NULL) {
/* Could not perform a reverse lookup */
*space = ' ';
apr_file_puts(line, outfile);
noreverse++;
/* Add to cache */
*space = '\0';
apr_hash_set(cache, (const void *) line, strlen(line),
(const void *) apr_pstrdup(pool, line));
continue;
}
/* Perform a double lookup */
if (doublelookups) {
/* Do a forward lookup on our hostname, and see if that matches our
* original IP address.
*/
status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
0, pool);
if (status == APR_SUCCESS ||
memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
/* Double-lookup failed */
*space = ' ';
apr_file_puts(line, outfile);
doublefailed++;
/* Add to cache */
*space = '\0';
apr_hash_set(cache, (const void *) line, strlen(line),
(const void *) apr_pstrdup(pool, line));
continue;
}
}
/* Outout the resolved name */
apr_file_printf(outfile, "%s %s", hostname, space + 1);
/* Store it in the cache */
apr_hash_set(cache, (const void *) line, strlen(line),
(const void *) apr_pstrdup(pool, hostname));
}
/* Flush any remaining output */
apr_file_flush(outfile);
if (stats) {
if (apr_file_open(&statsfile, stats,
APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
APR_OS_DEFAULT, pool) != APR_SUCCESS) {
apr_file_printf(errfile, "%s: Could not open %s for writing.",
shortname, stats);
return 1;
}
print_statistics(statsfile);
apr_file_close(statsfile);
}
return 0;
}