logresolve.c revision 0fdce60d89493753aeeadd6c26755143515f3de6
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl/* Copyright 1999-2004 The Apache Software Foundation
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * Licensed under the Apache License, Version 2.0 (the "License");
c362cf8bdeb690d43aca572eedf3343e8726b961nd * you may not use this file except in compliance with the License.
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * You may obtain a copy of the License at
031b91a62d25106ae69d4693475c79618dd5e884fielding * Unless required by applicable law or agreed to in writing, software
031b91a62d25106ae69d4693475c79618dd5e884fielding * distributed under the License is distributed on an "AS IS" BASIS,
031b91a62d25106ae69d4693475c79618dd5e884fielding * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
031b91a62d25106ae69d4693475c79618dd5e884fielding * See the License for the specific language governing permissions and
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * limitations under the License.
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * logresolve 1.1
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * UUNET Canada, April 16, 1995
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * Usage: logresolve [-s filename] [-c] < access_log > new_log
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * Arguments:
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * -s filename name of a file to record statistics
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * -c check the DNS for a matching A record for the host.
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * To generate meaningful statistics from an HTTPD log file, it's good
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * to have the domain name of each machine that accessed your site, but
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * doing this on the fly can slow HTTPD down.
955504c59e54738aafe056e88240dddcfd70fc78covener * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * resolution off. Before running your stats program, just run your log
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * file through this program (logresolve) and all of your IP numbers will
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * be resolved into hostnames (where possible).
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * logresolve takes an HTTPD access log (in the COMMON log file format,
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * or any other format that has the IP number/domain name as the first
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * field for that matter), and outputs the same file with all of the
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * domain names looked up. Where no domain name can be found, the IP
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * number is left in.
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * To minimize impact on your nameserver, logresolve has its very own
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * internal hash-table cache. This means that each IP number will only
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * be looked up the first time it is found in the log file.
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * The -c option causes logresolve to apply the same check as httpd
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * address, it looks up the IP addresses for the hostname and checks
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * that one of these matches the original address.
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic void cgethost(struct in_addr ipnum, char *string, int check);
c3ea2f06571c877091cd2f1016e47b1d5660df9drjungstatic int get_line(char *s, int n);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl/* maximum line length */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl/* maximum length of a domain name */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl/* number of buckets in cache hash apr_table_t */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * struct nsrec - record of nameservice for cache linked list
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * hostname, i.e. hostname=IP number
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * statistics - obvious
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslextern int h_errno; /* some machines don't have this in their headers */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl/* largest value for h_errno */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic int cachehits = 0;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic int cachesize = 0;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic int entries = 0;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic int resolves = 0;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic int withname = 0;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * cgethost - gets hostname by IP address, caching, and adding unresolvable
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * IP numbers with their IP number as hostname, setting noname flag
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jslstatic void cgethost (struct in_addr ipnum, char *string, int check)
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl /* size of string == MAXDNAME +1 */
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * prints various statistics to output
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl fprintf(output, "Cache buckets : IP number * hostname\n");
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl for (i = 0; i < BUCKETS; i++)
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl for (current = nscache[i]; current != NULL; current = current->next) {
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung * gets a line from stdin
c3ea2f06571c877091cd2f1016e47b1d5660df9drjungstatic int get_line (char *s, int n)
c3ea2f06571c877091cd2f1016e47b1d5660df9drjung return (0);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl return (1);
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl /* If we apr'ify this code, apr_pool_create/apr_pool_destroy
8a5db6a3d29e28627bf8a6d6f7066e840da6bdf2jsl * should perform the WSAStartup/WSACleanup for us.
check = 0;
exit(0);
for (i = 0; i < BUCKETS; i++)
errors[i] = 0;
entries++;
withname++;
withname++;
resolves++;
WSACleanup();
,statfile);