logresolve.c revision 6e252c97a556417cde1cbc948eaf62c39d404a7c
e1e8390280254f7f0580d701e583f670643d4f3fnilgun/* Licensed to the Apache Software Foundation (ASF) under one or more
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * contributor license agreements. See the NOTICE file distributed with
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * this work for additional information regarding copyright ownership.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * The ASF licenses this file to You under the Apache License, Version 2.0
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * (the "License"); you may not use this file except in compliance with
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * the License. You may obtain a copy of the License at
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * http://www.apache.org/licenses/LICENSE-2.0
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Unless required by applicable law or agreed to in writing, software
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * distributed under the License is distributed on an "AS IS" BASIS,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * See the License for the specific language governing permissions and
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * limitations under the License.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun */
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun/*
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * logresolve 2.0
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * UUNET Canada, April 16, 1995
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Rewritten again, and ported to APR by Colm MacCarthaigh
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Usage: logresolve [-s filename] [-c] < access_log > new_log
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Arguments:
45a544a8bb3fa1f95e5edac9fb3e723e2bb7001drbowen * -s filename name of a file to record statistics
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * -c check the DNS for a matching A record for the host.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Notes: (For historical interest)
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * To generate meaningful statistics from an HTTPD log file, it's good
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * to have the domain name of each machine that accessed your site, but
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * doing this on the fly can slow HTTPD down.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * resolution off. Before running your stats program, just run your log
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * file through this program (logresolve) and all of your IP numbers will
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * be resolved into hostnames (where possible).
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * logresolve takes an HTTPD access log (in the COMMON log file format,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * or any other format that has the IP number/domain name as the first
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * field for that matter), and outputs the same file with all of the
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * domain names looked up. Where no domain name can be found, the IP
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * number is left in.
e1e8390280254f7f0580d701e583f670643d4f3fnilgun *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * To minimize impact on your nameserver, logresolve has its very own
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * internal hash-table cache. This means that each IP number will only
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * be looked up the first time it is found in the log file.
2704de98885368683621b01c8f8f4e4b01557611takashi *
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * The -c option causes logresolve to apply the same check as httpd
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
b9f522ae1c0ed2bf3fc4444245bf28b2e2449a65nd * address, it looks up the IP addresses for the hostname and checks
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * that one of these matches the original address.
c38e2a97e43fc69b22f6b03c6d2f60e3bd705f89sf */
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_lib.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_hash.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_getopt.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_strings.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_file_io.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include "apr_network_io.h"
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#if APR_HAVE_STDLIB_H
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#include <stdlib.h>
2704de98885368683621b01c8f8f4e4b01557611takashi#endif
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#define READ_BUF_SIZE 10240
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#define WRITE_BUF_SIZE 10240
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#define LINE_BUF_SIZE 2048
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic apr_file_t *errfile;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic const char *shortname = "logresolve";
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic apr_hash_t *cache;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun/* Statistics */
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int cachehits = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int cachesize = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int entries = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int resolves = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int withname = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int doublefailed = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic int noreverse = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun/*
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * prints various statistics to output
e1e8390280254f7f0580d701e583f670643d4f3fnilgun */
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#define NL APR_EOL_STR
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic void print_statistics (apr_file_t *output)
e1e8390280254f7f0580d701e583f670643d4f3fnilgun{
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, "logresolve Statistics:" NL);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, "Entries: %d" NL, entries);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, " With name : %d" NL, withname);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, " Resolves : %d" NL, resolves);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun if (noreverse) {
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, " - No reverse : %d" NL,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun noreverse);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun }
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun if (doublefailed) {
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, " - Double lookup failed : %d" NL,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun doublefailed);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun }
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, "Cache hits : %d" NL, cachehits);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(output, "Cache size : %d" NL, cachesize);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun}
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun/*
e1e8390280254f7f0580d701e583f670643d4f3fnilgun * usage info
e1e8390280254f7f0580d701e583f670643d4f3fnilgun */
e1e8390280254f7f0580d701e583f670643d4f3fnilgunstatic void usage(void)
e1e8390280254f7f0580d701e583f670643d4f3fnilgun{
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_printf(errfile,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun "Usage: %s [-s STATFILE] [-c]" NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun "Options:" NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun " -s Record statistics to STATFILE when finished." NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun " -c Perform double lookups when resolving IP addresses." NL,
e1e8390280254f7f0580d701e583f670643d4f3fnilgun shortname, shortname);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun exit(1);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun}
e1e8390280254f7f0580d701e583f670643d4f3fnilgun#undef NL
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgunint main(int argc, const char * const argv[])
e1e8390280254f7f0580d701e583f670643d4f3fnilgun{
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_t * outfile;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_t * infile;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_getopt_t * o;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_pool_t * pool;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_pool_t *pline;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_status_t status;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun const char * arg;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun char * stats = NULL;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun char * inbuffer;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun char * outbuffer;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun char line[LINE_BUF_SIZE];
e1e8390280254f7f0580d701e583f670643d4f3fnilgun int doublelookups = 0;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
2704de98885368683621b01c8f8f4e4b01557611takashi if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
2704de98885368683621b01c8f8f4e4b01557611takashi return 1;
2704de98885368683621b01c8f8f4e4b01557611takashi }
2704de98885368683621b01c8f8f4e4b01557611takashi atexit(apr_terminate);
2704de98885368683621b01c8f8f4e4b01557611takashi
2704de98885368683621b01c8f8f4e4b01557611takashi if (argc) {
2704de98885368683621b01c8f8f4e4b01557611takashi shortname = apr_filepath_name_get(argv[0]);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun }
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
e1e8390280254f7f0580d701e583f670643d4f3fnilgun return 1;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun }
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_file_open_stderr(&errfile, pool);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun apr_getopt_init(&o, pool, argc, argv);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun while (1) {
e1e8390280254f7f0580d701e583f670643d4f3fnilgun char opt;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun status = apr_getopt(o, "s:c", &opt, &arg);
e1e8390280254f7f0580d701e583f670643d4f3fnilgun if (status == APR_EOF) {
9c1260efa52c82c2a58e5b5f20cd6902563d95f5rbowen break;
e1e8390280254f7f0580d701e583f670643d4f3fnilgun }
e1e8390280254f7f0580d701e583f670643d4f3fnilgun else if (status != APR_SUCCESS) {
usage();
}
else {
switch (opt) {
case 'c':
if (doublelookups) {
usage();
}
doublelookups = 1;
break;
case 's':
if (stats) {
usage();
}
stats = apr_pstrdup(pool, arg);
break;
} /* switch */
} /* else */
} /* while */
apr_file_open_stdout(&outfile, pool);
apr_file_open_stdin(&infile, pool);
/* Allocate two new 10k file buffers */
if ((outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL ||
(inbuffer = apr_palloc(pool, READ_BUF_SIZE)) == NULL) {
return 1;
}
/* Set the buffers */
apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE);
apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE);
cache = apr_hash_make(pool);
if(apr_pool_create(&pline, pool) != APR_SUCCESS){
return 1;
}
while (apr_file_gets(line, sizeof(line), infile) == APR_SUCCESS) {
char *hostname;
char *space;
apr_sockaddr_t *ip;
apr_sockaddr_t *ipdouble;
char dummy[] = " " APR_EOL_STR;
if (line[0] == '\0') {
continue;
}
/* Count our log entries */
entries++;
/* Check if this could even be an IP address */
if (!apr_isxdigit(line[0]) && line[0] != ':') {
withname++;
apr_file_puts(line, outfile);
continue;
}
/* Terminate the line at the next space */
if ((space = strchr(line, ' ')) != NULL) {
*space = '\0';
}
else {
space = dummy;
}
/* See if we have it in our cache */
hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
if (hostname) {
apr_file_printf(outfile, "%s %s", hostname, space + 1);
cachehits++;
continue;
}
/* Parse the IP address */
status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
if (status != APR_SUCCESS) {
/* Not an IP address */
withname++;
*space = ' ';
apr_file_puts(line, outfile);
continue;
}
/* This does not make much sense, but historically "resolves" means
* "parsed as an IP address". It does not mean we actually resolved
* the IP address into a hostname.
*/
resolves++;
/* From here on our we cache each result, even if it was not
* succesful
*/
cachesize++;
/* Try and perform a reverse lookup */
status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
if (status || hostname == NULL) {
/* Could not perform a reverse lookup */
*space = ' ';
apr_file_puts(line, outfile);
noreverse++;
/* Add to cache */
*space = '\0';
apr_hash_set(cache, line, APR_HASH_KEY_STRING,
apr_pstrdup(pool, line));
continue;
}
/* Perform a double lookup */
if (doublelookups) {
/* Do a forward lookup on our hostname, and see if that matches our
* original IP address.
*/
status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
0, pline);
if (status == APR_SUCCESS ||
memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
/* Double-lookup failed */
*space = ' ';
apr_file_puts(line, outfile);
doublefailed++;
/* Add to cache */
*space = '\0';
apr_hash_set(cache, line, APR_HASH_KEY_STRING,
apr_pstrdup(pool, line));
continue;
}
}
/* Outout the resolved name */
apr_file_printf(outfile, "%s %s", hostname, space + 1);
/* Store it in the cache */
apr_hash_set(cache, line, APR_HASH_KEY_STRING,
apr_pstrdup(pool, hostname));
apr_pool_clear(pline);
}
/* Flush any remaining output */
apr_file_flush(outfile);
if (stats) {
apr_file_t *statsfile;
if (apr_file_open(&statsfile, stats,
APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
APR_OS_DEFAULT, pool) != APR_SUCCESS) {
apr_file_printf(errfile, "%s: Could not open %s for writing.",
shortname, stats);
return 1;
}
print_statistics(statsfile);
apr_file_close(statsfile);
}
return 0;
}