logresolve.c revision 0b4b04d8621478ba59f0a6ba2950ddc02ab92b58
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin/* Copyright 1999-2005 The Apache Software Foundation or its licensors, as
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * applicable.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Licensed under the Apache License, Version 2.0 (the "License");
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * you may not use this file except in compliance with the License.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * You may obtain a copy of the License at
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Unless required by applicable law or agreed to in writing, software
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * distributed under the License is distributed on an "AS IS" BASIS,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * See the License for the specific language governing permissions and
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * limitations under the License.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * logresolve 2.0
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * UUNET Canada, April 16, 1995
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Rewritten again, and ported to APR by Colm MacCarthaigh
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Usage: logresolve [-s filename] [-c] < access_log > new_log
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Arguments:
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * -s filename name of a file to record statistics
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * -c check the DNS for a matching A record for the host.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Notes: (For historical interest)
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * To generate meaningful statistics from an HTTPD log file, it's good
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * to have the domain name of each machine that accessed your site, but
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * doing this on the fly can slow HTTPD down.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * resolution off. Before running your stats program, just run your log
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * file through this program (logresolve) and all of your IP numbers will
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * be resolved into hostnames (where possible).
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * logresolve takes an HTTPD access log (in the COMMON log file format,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * or any other format that has the IP number/domain name as the first
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * field for that matter), and outputs the same file with all of the
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * domain names looked up. Where no domain name can be found, the IP
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * number is left in.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * To minimize impact on your nameserver, logresolve has its very own
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * internal hash-table cache. This means that each IP number will only
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * be looked up the first time it is found in the log file.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * The -c option causes logresolve to apply the same check as httpd
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * address, it looks up the IP addresses for the hostname and checks
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * that one of these matches the original address.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin/* Statistics */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int cachehits = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int cachesize = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int entries = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int resolves = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int withname = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int doublefailed = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic int noreverse = 0;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * prints various statistics to output
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, "logresolve Statistics:" NL);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, " With name : %d" NL, withname);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, " Resolves : %d" NL, resolves);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, " - Double lookup failed : %d" NL,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, "Cache hits : %d" NL, cachehits);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(output, "Cache size : %d" NL, cachesize);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * usage info
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrinstatic void usage(void)
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin " -s Record statistics to STATFILE when finished." NL
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin " -c Perform double lookups when resolving IP addresses." NL,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin const char * arg;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin while (1) {
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin } /* switch */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin } /* else */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin } /* while */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Allocate two new 10k file buffers */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Set the buffers */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin while(apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Count our log entries */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Check if this could even be an IP address */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Terminate the line at the next space */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* See if we have it in our cache */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin hostname = (char *) apr_hash_get(cache, (const void *)line,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(outfile, "%s %s", hostname, space + 1);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Parse the IP address */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Not an IP address */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* This does not make much sense, but historically "resolves" means
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * "parsed as an IP address". It does not mean we actually resolved
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * the IP address into a hostname.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* From here on our we cache each result, even if it was not
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * succesful
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Try and perform a reverse lookup */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Could not perform a reverse lookup */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Add to cache */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_hash_set(cache, (const void *) line, strlen(line),
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Perform a double lookup */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Do a forward lookup on our hostname, and see if that matches our
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin * original IP address.
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Double-lookup failed */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Add to cache */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_hash_set(cache, (const void *) line, strlen(line),
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Outout the resolved name */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(outfile, "%s %s", hostname, space + 1);
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Store it in the cache */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_hash_set(cache, (const void *) line, strlen(line),
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin /* Flush any remaining output */
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
fd279fe992f7171dc3f6d4d40d6db5bb74f2d96eminfrin apr_file_printf(errfile, "%s: Could not open %s for writing.",