logresolve.c revision 09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1
135c1278adef96d36fd421c536b1acc54a341cfbsf/*
135c1278adef96d36fd421c536b1acc54a341cfbsf * logresolve 1.1
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
135c1278adef96d36fd421c536b1acc54a341cfbsf * UUNET Canada, April 16, 1995
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Rewritten by David Robinson. (drtr@ast.cam.ac.uk)
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Usage: logresolve [-s filename] [-c] < access_log > new_log
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Arguments:
135c1278adef96d36fd421c536b1acc54a341cfbsf * -s filename name of a file to record statistics
135c1278adef96d36fd421c536b1acc54a341cfbsf * -c check the DNS for a matching A record for the host.
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Notes:
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * To generate meaningful statistics from an HTTPD log file, it's good
135c1278adef96d36fd421c536b1acc54a341cfbsf * to have the domain name of each machine that accessed your site, but
135c1278adef96d36fd421c536b1acc54a341cfbsf * doing this on the fly can slow HTTPD down.
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
135c1278adef96d36fd421c536b1acc54a341cfbsf * resolution off. Before running your stats program, just run your log
135c1278adef96d36fd421c536b1acc54a341cfbsf * file through this program (logresolve) and all of your IP numbers will
135c1278adef96d36fd421c536b1acc54a341cfbsf * be resolved into hostnames (where possible).
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * logresolve takes an HTTPD access log (in the COMMON log file format,
135c1278adef96d36fd421c536b1acc54a341cfbsf * or any other format that has the IP number/domain name as the first
135c1278adef96d36fd421c536b1acc54a341cfbsf * field for that matter), and outputs the same file with all of the
135c1278adef96d36fd421c536b1acc54a341cfbsf * domain names looked up. Where no domain name can be found, the IP
135c1278adef96d36fd421c536b1acc54a341cfbsf * number is left in.
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * To minimize impact on your nameserver, logresolve has its very own
135c1278adef96d36fd421c536b1acc54a341cfbsf * internal hash-table cache. This means that each IP number will only
135c1278adef96d36fd421c536b1acc54a341cfbsf * be looked up the first time it is found in the log file.
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * The -c option causes logresolve to apply the same check as httpd
135c1278adef96d36fd421c536b1acc54a341cfbsf * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
135c1278adef96d36fd421c536b1acc54a341cfbsf * address, it looks up the IP addresses for the hostname and checks
135c1278adef96d36fd421c536b1acc54a341cfbsf * that one of these matches the original address.
135c1278adef96d36fd421c536b1acc54a341cfbsf */
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf#include "ap_config.h"
135c1278adef96d36fd421c536b1acc54a341cfbsf#include <sys/types.h>
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf#include <ctype.h>
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf#ifndef MPE
135c1278adef96d36fd421c536b1acc54a341cfbsf#include <arpa/inet.h>
135c1278adef96d36fd421c536b1acc54a341cfbsf#endif
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic void cgethost(struct in_addr ipnum, char *string, int check);
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int getline(char *s, int n);
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic void stats(FILE *output);
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/* maximum line length */
135c1278adef96d36fd421c536b1acc54a341cfbsf#define MAXLINE 1024
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/* maximum length of a domain name */
135c1278adef96d36fd421c536b1acc54a341cfbsf#ifndef MAXDNAME
135c1278adef96d36fd421c536b1acc54a341cfbsf#define MAXDNAME 256
135c1278adef96d36fd421c536b1acc54a341cfbsf#endif
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/* number of buckets in cache hash table */
135c1278adef96d36fd421c536b1acc54a341cfbsf#define BUCKETS 256
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf#if defined(NEED_STRDUP)
135c1278adef96d36fd421c536b1acc54a341cfbsfchar *strdup (const char *str)
135c1278adef96d36fd421c536b1acc54a341cfbsf{
135c1278adef96d36fd421c536b1acc54a341cfbsf char *dup;
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf if (!(dup = (char *) malloc(strlen(str) + 1)))
135c1278adef96d36fd421c536b1acc54a341cfbsf return NULL;
135c1278adef96d36fd421c536b1acc54a341cfbsf dup = strcpy(dup, str);
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf return dup;
135c1278adef96d36fd421c536b1acc54a341cfbsf}
135c1278adef96d36fd421c536b1acc54a341cfbsf#endif
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/*
135c1278adef96d36fd421c536b1acc54a341cfbsf * struct nsrec - record of nameservice for cache linked list
135c1278adef96d36fd421c536b1acc54a341cfbsf *
135c1278adef96d36fd421c536b1acc54a341cfbsf * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
135c1278adef96d36fd421c536b1acc54a341cfbsf * hostname, i.e. hostname=IP number
135c1278adef96d36fd421c536b1acc54a341cfbsf */
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsfstruct nsrec {
135c1278adef96d36fd421c536b1acc54a341cfbsf struct in_addr ipnum;
135c1278adef96d36fd421c536b1acc54a341cfbsf char *hostname;
135c1278adef96d36fd421c536b1acc54a341cfbsf int noname;
135c1278adef96d36fd421c536b1acc54a341cfbsf struct nsrec *next;
135c1278adef96d36fd421c536b1acc54a341cfbsf} *nscache[BUCKETS];
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/*
135c1278adef96d36fd421c536b1acc54a341cfbsf * statistics - obvious
135c1278adef96d36fd421c536b1acc54a341cfbsf */
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf#ifndef h_errno
135c1278adef96d36fd421c536b1acc54a341cfbsfextern int h_errno; /* some machines don't have this in their headers */
135c1278adef96d36fd421c536b1acc54a341cfbsf#endif
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/* largeste value for h_errno */
135c1278adef96d36fd421c536b1acc54a341cfbsf#define MAX_ERR (NO_ADDRESS)
135c1278adef96d36fd421c536b1acc54a341cfbsf#define UNKNOWN_ERR (MAX_ERR+1)
135c1278adef96d36fd421c536b1acc54a341cfbsf#define NO_REVERSE (MAX_ERR+2)
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int cachehits = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int cachesize = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int entries = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int resolves = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int withname = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int errors[MAX_ERR + 3];
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf/*
135c1278adef96d36fd421c536b1acc54a341cfbsf * cgethost - gets hostname by IP address, caching, and adding unresolvable
135c1278adef96d36fd421c536b1acc54a341cfbsf * IP numbers with their IP number as hostname, setting noname flag
135c1278adef96d36fd421c536b1acc54a341cfbsf */
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic void cgethost (struct in_addr ipnum, char *string, int check)
135c1278adef96d36fd421c536b1acc54a341cfbsf{
135c1278adef96d36fd421c536b1acc54a341cfbsf struct nsrec **current, *new;
135c1278adef96d36fd421c536b1acc54a341cfbsf struct hostent *hostdata;
135c1278adef96d36fd421c536b1acc54a341cfbsf char *name;
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
135c1278adef96d36fd421c536b1acc54a341cfbsf (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
135c1278adef96d36fd421c536b1acc54a341cfbsf current = &(*current)->next;
135c1278adef96d36fd421c536b1acc54a341cfbsf
135c1278adef96d36fd421c536b1acc54a341cfbsf if (*current == NULL) {
135c1278adef96d36fd421c536b1acc54a341cfbsf cachesize++;
135c1278adef96d36fd421c536b1acc54a341cfbsf new = (struct nsrec *) malloc(sizeof(struct nsrec));
135c1278adef96d36fd421c536b1acc54a341cfbsf if (new == NULL) {
135c1278adef96d36fd421c536b1acc54a341cfbsf perror("malloc");
135c1278adef96d36fd421c536b1acc54a341cfbsf fprintf(stderr, "Insufficient memory\n");
135c1278adef96d36fd421c536b1acc54a341cfbsf exit(1);
135c1278adef96d36fd421c536b1acc54a341cfbsf }
135c1278adef96d36fd421c536b1acc54a341cfbsf *current = new;
135c1278adef96d36fd421c536b1acc54a341cfbsf new->next = NULL;
new->ipnum = ipnum;
hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
AF_INET);
if (hostdata == NULL) {
if (h_errno > MAX_ERR)
errors[UNKNOWN_ERR]++;
else
errors[h_errno]++;
new->noname = h_errno;
name = strdup(inet_ntoa(ipnum));
}
else {
new->noname = 0;
name = strdup(hostdata->h_name);
if (check) {
if (name == NULL) {
perror("strdup");
fprintf(stderr, "Insufficient memory\n");
exit(1);
}
hostdata = gethostbyname(name);
if (hostdata != NULL) {
char **hptr;
for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
break;
if (*hptr == NULL)
hostdata = NULL;
}
if (hostdata == NULL) {
fprintf(stderr, "Bad host: %s != %s\n", name,
inet_ntoa(ipnum));
new->noname = NO_REVERSE;
free(name);
name = strdup(inet_ntoa(ipnum));
errors[NO_REVERSE]++;
}
}
}
new->hostname = name;
if (new->hostname == NULL) {
perror("strdup");
fprintf(stderr, "Insufficient memory\n");
exit(1);
}
}
else
cachehits++;
/* size of string == MAXDNAME +1 */
strncpy(string, (*current)->hostname, MAXDNAME);
string[MAXDNAME] = '\0';
}
/*
* prints various statistics to output
*/
static void stats (FILE *output)
{
int i;
char *ipstring;
struct nsrec *current;
char *errstring[MAX_ERR + 3];
for (i = 0; i < MAX_ERR + 3; i++)
errstring[i] = "Unknown error";
errstring[HOST_NOT_FOUND] = "Host not found";
errstring[TRY_AGAIN] = "Try again";
errstring[NO_RECOVERY] = "Non recoverable error";
errstring[NO_DATA] = "No data record";
errstring[NO_ADDRESS] = "No address";
errstring[NO_REVERSE] = "No reverse entry";
fprintf(output, "logresolve Statistics:\n");
fprintf(output, "Entries: %d\n", entries);
fprintf(output, " With name : %d\n", withname);
fprintf(output, " Resolves : %d\n", resolves);
if (errors[HOST_NOT_FOUND])
fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
if (errors[TRY_AGAIN])
fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
if (errors[NO_DATA])
fprintf(output, " - No data : %d\n", errors[NO_DATA]);
if (errors[NO_ADDRESS])
fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
if (errors[NO_REVERSE])
fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
fprintf(output, "Cache hits : %d\n", cachehits);
fprintf(output, "Cache size : %d\n", cachesize);
fprintf(output, "Cache buckets : IP number * hostname\n");
for (i = 0; i < BUCKETS; i++)
for (current = nscache[i]; current != NULL; current = current->next) {
ipstring = inet_ntoa(current->ipnum);
if (current->noname == 0)
fprintf(output, " %3d %15s - %s\n", i, ipstring,
current->hostname);
else {
if (current->noname > MAX_ERR + 2)
fprintf(output, " %3d %15s : Unknown error\n", i,
ipstring);
else
fprintf(output, " %3d %15s : %s\n", i, ipstring,
errstring[current->noname]);
}
}
}
/*
* gets a line from stdin
*/
static int getline (char *s, int n)
{
char *cp;
if (!fgets(s, n, stdin))
return (0);
cp = strchr(s, '\n');
if (cp)
*cp = '\0';
return (1);
}
int main (int argc, char *argv[])
{
struct in_addr ipnum;
char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
int i, check;
check = 0;
statfile = NULL;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-c") == 0)
check = 1;
else if (strcmp(argv[i], "-s") == 0) {
if (i == argc - 1) {
fprintf(stderr, "logresolve: missing filename to -s\n");
exit(1);
}
i++;
statfile = argv[i];
}
else {
fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
exit(0);
}
}
for (i = 0; i < BUCKETS; i++)
nscache[i] = NULL;
for (i = 0; i < MAX_ERR + 2; i++)
errors[i] = 0;
while (getline(line, MAXLINE)) {
if (line[0] == '\0')
continue;
entries++;
if (!isdigit(line[0])) { /* short cut */
puts(line);
withname++;
continue;
}
bar = strchr(line, ' ');
if (bar != NULL)
*bar = '\0';
ipnum.s_addr = inet_addr(line);
if (ipnum.s_addr == 0xffffffffu) {
if (bar != NULL)
*bar = ' ';
puts(line);
withname++;
continue;
}
resolves++;
cgethost(ipnum, hoststring, check);
if (bar != NULL)
printf("%s %s\n", hoststring, bar + 1);
else
puts(hoststring);
}
if (statfile != NULL) {
FILE *fp;
fp = fopen(statfile, "w");
if (fp == NULL) {
fprintf(stderr, "logresolve: could not open statistics file '%s'\n"
,statfile);
exit(1);
}
stats(fp);
fclose(fp);
}
return (0);
}