logresolve.c revision bb65aeae7af1d33b64252bbc1b966942d757ac60
842ae4bd224140319ae7feec1872b93dfd491143fielding * logresolve 1.1
842ae4bd224140319ae7feec1872b93dfd491143fielding * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
842ae4bd224140319ae7feec1872b93dfd491143fielding * UUNET Canada, April 16, 1995
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Rewritten by David Robinson. (drtr@ast.cam.ac.uk)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Usage: logresolve [-s filename] [-c] < access_log > new_log
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Arguments:
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * -s filename name of a file to record statistics
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * -c check the DNS for a matching A record for the host.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * To generate meaningful statistics from an HTTPD log file, it's good
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * to have the domain name of each machine that accessed your site, but
e8f95a682820a599fe41b22977010636be5c2717jim * doing this on the fly can slow HTTPD down.
e8f95a682820a599fe41b22977010636be5c2717jim * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
1747d30b98aa1bdbc43994c02cd46ab4cb9319e4fielding * resolution off. Before running your stats program, just run your log
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * file through this program (logresolve) and all of your IP numbers will
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * be resolved into hostnames (where possible).
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * logresolve takes an HTTPD access log (in the COMMON log file format,
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * or any other format that has the IP number/domain name as the first
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * field for that matter), and outputs the same file with all of the
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * domain names looked up. Where no domain name can be found, the IP
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * number is left in.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * To minimize impact on your nameserver, logresolve has its very own
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * internal hash-table cache. This means that each IP number will only
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * be looked up the first time it is found in the log file.
5c0419d51818eb02045cf923a9fe456127a44c60wrowe * The -c option causes logresolve to apply the same check as httpd
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * address, it looks up the IP addresses for the hostname and checks
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * that one of these matches the original address.
87587593f1a53030e840acc0dec6cc881022ea40covenerstatic void cgethost(struct in_addr ipnum, char *string, int check);
87587593f1a53030e840acc0dec6cc881022ea40covenerstatic int getline(char *s, int n);
0568280364eb026393be492ebc732795c4934643jorton/* maximum line length */
0568280364eb026393be492ebc732795c4934643jorton/* maximum length of a domain name */
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener/* number of buckets in cache hash apr_table_t */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * struct nsrec - record of nameservice for cache linked list
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * hostname, i.e. hostname=IP number
95b6fe1346805e1731e6e97c15d569c73be22cf7minfrin * statistics - obvious
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesextern int h_errno; /* some machines don't have this in their headers */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes/* largest value for h_errno */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int cachehits = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int cachesize = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int entries = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int resolves = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int withname = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * cgethost - gets hostname by IP address, caching, and adding unresolvable
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * IP numbers with their IP number as hostname, setting noname flag
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic void cgethost (struct in_addr ipnum, char *string, int check)
f43b67c5a9d29b572eac916f8335cedc80c908bebnicholes current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes new = (struct nsrec *) malloc(sizeof(struct nsrec));
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* size of string == MAXDNAME +1 */
f2be127030aa4190033084f0a6add531c9bc41desf * prints various statistics to output
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
6683642c1e0032eeeed5f99e8c14880692ef84c5sf fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener fprintf(output, " - No data : %d\n", errors[NO_DATA]);
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener fprintf(output, "Cache buckets : IP number * hostname\n");
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener for (i = 0; i < BUCKETS; i++)
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener for (current = nscache[i]; current != NULL; current = current->next) {
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener * gets a line from stdin
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovenerstatic int getline (char *s, int n)
4e9c24785b525d2956e6e381015c0f2bd0a72f4bcovener return (0);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
465bb68501690d7a47bfd2a6129580047d76d8f1rederpj /* If we apr'ify this code, apr_create_pool/apr_destroy_pool
e8f95a682820a599fe41b22977010636be5c2717jim * should perform the WSAStartup/WSACleanup for us.
d330a801b1e5d63a4b8b4fd431542ad0903fd71bbnicholes fprintf(stderr, "logresolve: missing filename to -s\n");
513b324e774c559b579896df131fd7c8471ed529rederpj fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
513b324e774c559b579896df131fd7c8471ed529rederpj for (i = 0; i < BUCKETS; i++)
d330a801b1e5d63a4b8b4fd431542ad0903fd71bbnicholes fprintf(stderr, "logresolve: could not open statistics file '%s'\n"