logresolve.c revision 842ae4bd224140319ae7feec1872b93dfd491143
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding/* Licensed to the Apache Software Foundation (ASF) under one or more
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * contributor license agreements. See the NOTICE file distributed with
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * this work for additional information regarding copyright ownership.
b99dbaab171d91e1b664397cc40e039d0c087c65fielding * The ASF licenses this file to You under the Apache License, Version 2.0
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * (the "License"); you may not use this file except in compliance with
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * the License. You may obtain a copy of the License at
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * Unless required by applicable law or agreed to in writing, software
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * distributed under the License is distributed on an "AS IS" BASIS,
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * See the License for the specific language governing permissions and
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * limitations under the License.
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * logresolve 2.0
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * UUNET Canada, April 16, 1995
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Rewritten again, and ported to APR by Colm MacCarthaigh
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Usage: logresolve [-s filename] [-c] < access_log > new_log
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Arguments:
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * -s filename name of a file to record statistics
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * -c check the DNS for a matching A record for the host.
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Notes: (For historical interest)
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * To generate meaningful statistics from an HTTPD log file, it's good
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * to have the domain name of each machine that accessed your site, but
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * doing this on the fly can slow HTTPD down.
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * resolution off. Before running your stats program, just run your log
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * file through this program (logresolve) and all of your IP numbers will
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * be resolved into hostnames (where possible).
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * logresolve takes an HTTPD access log (in the COMMON log file format,
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * or any other format that has the IP number/domain name as the first
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * field for that matter), and outputs the same file with all of the
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * domain names looked up. Where no domain name can be found, the IP
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * number is left in.
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * To minimize impact on your nameserver, logresolve has its very own
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * internal hash-table cache. This means that each IP number will only
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * be looked up the first time it is found in the log file.
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * The -c option causes logresolve to apply the same check as httpd
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * address, it looks up the IP addresses for the hostname and checks
ab2c1c1c83ec91415565da5a71fbc15d9685caa6fielding * that one of these matches the original address.
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding/* Statistics */
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int cachehits = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int cachesize = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int entries = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int resolves = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int withname = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int doublefailed = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefieldingstatic int noreverse = 0;
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding * prints various statistics to output
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, "logresolve Statistics:" NL);
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, "Entries: %d" NL, entries);
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, " With name : %d" NL, withname);
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, " Resolves : %d" NL, resolves);
ba4c566c200c2436dae841b7c811807c80cd712afielding apr_file_printf(output, " - Double lookup failed : %d" NL,
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, "Cache hits : %d" NL, cachehits);
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding apr_file_printf(output, "Cache size : %d" NL, cachesize);
0d496deee49b66bb43883640fa9c1a7e884a1b8ctrawick * usage info
024cd9589e52cf11ce765dfddb5b5f0c6e421a48gsteinstatic void usage(void)
024cd9589e52cf11ce765dfddb5b5f0c6e421a48gstein "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
ea92d0ffcb30b186010a2c8ca2c80d2ac09e34dastoddard " -c Perform double lookups when resolving IP addresses." NL,
ea92d0ffcb30b186010a2c8ca2c80d2ac09e34dastoddard const char * arg;
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding while (1) {
3d96ee83babeec32482c9082c9426340cee8c44dwrowe switch (opt) {
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard } /* switch */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard } /* else */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard } /* while */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
1ccd992d37d62c8cb2056126f2234f64ec189bfddougm /* Allocate two new 10k file buffers */
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
a6b9ed64fdf548c61de9714e2cfb999ec59d149cgstein /* Set the buffers */
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding while (apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
a6b9ed64fdf548c61de9714e2cfb999ec59d149cgstein /* Count our log entries */
ea92d0ffcb30b186010a2c8ca2c80d2ac09e34dastoddard /* Check if this could even be an IP address */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Terminate the line at the next space */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* See if we have it in our cache */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard apr_file_printf(outfile, "%s %s", hostname, space + 1);
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Parse the IP address */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Not an IP address */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* This does not make much sense, but historically "resolves" means
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard * "parsed as an IP address". It does not mean we actually resolved
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard * the IP address into a hostname.
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* From here on our we cache each result, even if it was not
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard * succesful
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Try and perform a reverse lookup */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Could not perform a reverse lookup */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Add to cache */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Perform a double lookup */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Do a forward lookup on our hostname, and see if that matches our
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard * original IP address.
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Double-lookup failed */
0f081398cf0eef8cc7c66a535d450110a92dc8aefielding /* Add to cache */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Outout the resolved name */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard apr_file_printf(outfile, "%s %s", hostname, space + 1);
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Store it in the cache */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard /* Flush any remaining output */
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
fd0075570654d8f3473f12c47f507c8b3c59a8e4stoddard apr_file_printf(errfile, "%s: Could not open %s for writing.",