logresolve.c revision 09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1
135c1278adef96d36fd421c536b1acc54a341cfbsf * logresolve 1.1
135c1278adef96d36fd421c536b1acc54a341cfbsf * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
135c1278adef96d36fd421c536b1acc54a341cfbsf * UUNET Canada, April 16, 1995
135c1278adef96d36fd421c536b1acc54a341cfbsf * Rewritten by David Robinson. (drtr@ast.cam.ac.uk)
135c1278adef96d36fd421c536b1acc54a341cfbsf * Usage: logresolve [-s filename] [-c] < access_log > new_log
135c1278adef96d36fd421c536b1acc54a341cfbsf * Arguments:
135c1278adef96d36fd421c536b1acc54a341cfbsf * -s filename name of a file to record statistics
135c1278adef96d36fd421c536b1acc54a341cfbsf * -c check the DNS for a matching A record for the host.
135c1278adef96d36fd421c536b1acc54a341cfbsf * To generate meaningful statistics from an HTTPD log file, it's good
135c1278adef96d36fd421c536b1acc54a341cfbsf * to have the domain name of each machine that accessed your site, but
135c1278adef96d36fd421c536b1acc54a341cfbsf * doing this on the fly can slow HTTPD down.
135c1278adef96d36fd421c536b1acc54a341cfbsf * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
135c1278adef96d36fd421c536b1acc54a341cfbsf * resolution off. Before running your stats program, just run your log
135c1278adef96d36fd421c536b1acc54a341cfbsf * file through this program (logresolve) and all of your IP numbers will
135c1278adef96d36fd421c536b1acc54a341cfbsf * be resolved into hostnames (where possible).
135c1278adef96d36fd421c536b1acc54a341cfbsf * logresolve takes an HTTPD access log (in the COMMON log file format,
135c1278adef96d36fd421c536b1acc54a341cfbsf * or any other format that has the IP number/domain name as the first
135c1278adef96d36fd421c536b1acc54a341cfbsf * field for that matter), and outputs the same file with all of the
135c1278adef96d36fd421c536b1acc54a341cfbsf * domain names looked up. Where no domain name can be found, the IP
135c1278adef96d36fd421c536b1acc54a341cfbsf * number is left in.
135c1278adef96d36fd421c536b1acc54a341cfbsf * To minimize impact on your nameserver, logresolve has its very own
135c1278adef96d36fd421c536b1acc54a341cfbsf * internal hash-table cache. This means that each IP number will only
135c1278adef96d36fd421c536b1acc54a341cfbsf * be looked up the first time it is found in the log file.
135c1278adef96d36fd421c536b1acc54a341cfbsf * The -c option causes logresolve to apply the same check as httpd
135c1278adef96d36fd421c536b1acc54a341cfbsf * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
135c1278adef96d36fd421c536b1acc54a341cfbsf * address, it looks up the IP addresses for the hostname and checks
135c1278adef96d36fd421c536b1acc54a341cfbsf * that one of these matches the original address.
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic void cgethost(struct in_addr ipnum, char *string, int check);
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int getline(char *s, int n);
135c1278adef96d36fd421c536b1acc54a341cfbsf/* maximum line length */
135c1278adef96d36fd421c536b1acc54a341cfbsf/* maximum length of a domain name */
135c1278adef96d36fd421c536b1acc54a341cfbsf/* number of buckets in cache hash table */
135c1278adef96d36fd421c536b1acc54a341cfbsf * struct nsrec - record of nameservice for cache linked list
135c1278adef96d36fd421c536b1acc54a341cfbsf * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
135c1278adef96d36fd421c536b1acc54a341cfbsf * hostname, i.e. hostname=IP number
135c1278adef96d36fd421c536b1acc54a341cfbsf * statistics - obvious
135c1278adef96d36fd421c536b1acc54a341cfbsfextern int h_errno; /* some machines don't have this in their headers */
135c1278adef96d36fd421c536b1acc54a341cfbsf/* largeste value for h_errno */
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int cachehits = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int cachesize = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int entries = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int resolves = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic int withname = 0;
135c1278adef96d36fd421c536b1acc54a341cfbsf * cgethost - gets hostname by IP address, caching, and adding unresolvable
135c1278adef96d36fd421c536b1acc54a341cfbsf * IP numbers with their IP number as hostname, setting noname flag
135c1278adef96d36fd421c536b1acc54a341cfbsfstatic void cgethost (struct in_addr ipnum, char *string, int check)
135c1278adef96d36fd421c536b1acc54a341cfbsf current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
135c1278adef96d36fd421c536b1acc54a341cfbsf (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
135c1278adef96d36fd421c536b1acc54a341cfbsf while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
AF_INET);
if (check) {
char **hptr;
cachehits++;
char *ipstring;
for (i = 0; i < BUCKETS; i++)
ipstring);
static int getline (char *s, int n)
char *cp;
if (cp)
int i, check;
check = 0;
exit(0);
for (i = 0; i < BUCKETS; i++)
errors[i] = 0;
entries++;
withname++;
withname++;
resolves++;
,statfile);