logresolve.c revision 2b672ae3a6d190fb62d04f4f47bbdc0a2bde151f
08cb74ca432a8c24e39f17dedce527e6a47b8001jerenkrantz/* Copyright 1999-2005 The Apache Software Foundation or its licensors, as
08cb74ca432a8c24e39f17dedce527e6a47b8001jerenkrantz * applicable.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Licensed under the Apache License, Version 2.0 (the "License");
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * you may not use this file except in compliance with the License.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * You may obtain a copy of the License at
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Unless required by applicable law or agreed to in writing, software
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * distributed under the License is distributed on an "AS IS" BASIS,
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * See the License for the specific language governing permissions and
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * limitations under the License.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * logresolve 2.0
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * UUNET Canada, April 16, 1995
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Rewritten again, and ported to APR by Colm MacCarthaigh
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Usage: logresolve [-s filename] [-c] < access_log > new_log
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Arguments:
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * -s filename name of a file to record statistics
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * -c check the DNS for a matching A record for the host.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Notes: (For historical interest)
5c0419d51818eb02045cf923a9fe456127a44c60wrowe * To generate meaningful statistics from an HTTPD log file, it's good
5c0419d51818eb02045cf923a9fe456127a44c60wrowe * to have the domain name of each machine that accessed your site, but
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * doing this on the fly can slow HTTPD down.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * resolution off. Before running your stats program, just run your log
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * file through this program (logresolve) and all of your IP numbers will
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * be resolved into hostnames (where possible).
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton * logresolve takes an HTTPD access log (in the COMMON log file format,
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton * or any other format that has the IP number/domain name as the first
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton * field for that matter), and outputs the same file with all of the
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * domain names looked up. Where no domain name can be found, the IP
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * number is left in.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * To minimize impact on your nameserver, logresolve has its very own
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * internal hash-table cache. This means that each IP number will only
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * be looked up the first time it is found in the log file.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * The -c option causes logresolve to apply the same check as httpd
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * address, it looks up the IP addresses for the hostname and checks
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * that one of these matches the original address.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes/* Statistics */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int cachehits = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int cachesize = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int entries = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int resolves = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int withname = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int doublefailed = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic int noreverse = 0;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * prints various statistics to output
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, "logresolve Statistics:" NL);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, "Entries: %d" NL, entries);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, " With name : %d" NL, withname);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, " Resolves : %d" NL, resolves);
0568280364eb026393be492ebc732795c4934643jorton apr_file_printf(output, " - Double lookup failed : %d" NL,
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, "Cache hits : %d" NL, cachehits);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(output, "Cache size : %d" NL, cachesize);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * usage info
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesstatic void usage(void)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes " -s Record statistics to STATFILE when finished." NL
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes " -c Perform double lookups when resolving IP addresses." NL,
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes const char * arg;
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes } /* switch */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes } /* else */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes } /* while */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Allocate two new 10k file buffers */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Set the buffers */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes while (apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Count our log entries */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Check if this could even be an IP address */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Terminate the line at the next space */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* See if we have it in our cache */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes apr_file_printf(outfile, "%s %s", hostname, space + 1);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Parse the IP address */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Not an IP address */
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe /* This does not make much sense, but historically "resolves" means
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe * "parsed as an IP address". It does not mean we actually resolved
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes * the IP address into a hostname.
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* From here on our we cache each result, even if it was not
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* Try and perform a reverse lookup */
54d22ed1c429b903b029bbd62621f11a9e286137minfrin status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* Could not perform a reverse lookup */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Add to cache */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Perform a double lookup */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Do a forward lookup on our hostname, and see if that matches our
54d22ed1c429b903b029bbd62621f11a9e286137minfrin * original IP address.
81965264d92dd8c9ca21d058420f6f6da34b3032minfrin status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
54d22ed1c429b903b029bbd62621f11a9e286137minfrin memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* Double-lookup failed */
e18ba90a1e610b43062e90cfa8bf0c1edcad7a49bnicholes /* Add to cache */
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes /* Outout the resolved name */
54d22ed1c429b903b029bbd62621f11a9e286137minfrin apr_file_printf(outfile, "%s %s", hostname, space + 1);
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* Store it in the cache */
54d22ed1c429b903b029bbd62621f11a9e286137minfrin /* Flush any remaining output */
54d22ed1c429b903b029bbd62621f11a9e286137minfrin APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
54d22ed1c429b903b029bbd62621f11a9e286137minfrin apr_file_printf(errfile, "%s: Could not open %s for writing.",