logresolve.c revision b76764362f1f69ced135b0f2e1481a26fef04347
2a0b626c070fea0a68a071a4160ff695eb4731dand/* Licensed to the Apache Software Foundation (ASF) under one or more
2a0b626c070fea0a68a071a4160ff695eb4731dand * contributor license agreements. See the NOTICE file distributed with
2a0b626c070fea0a68a071a4160ff695eb4731dand * this work for additional information regarding copyright ownership.
2a0b626c070fea0a68a071a4160ff695eb4731dand * The ASF licenses this file to You under the Apache License, Version 2.0
2a0b626c070fea0a68a071a4160ff695eb4731dand * (the "License"); you may not use this file except in compliance with
2a0b626c070fea0a68a071a4160ff695eb4731dand * the License. You may obtain a copy of the License at
2a0b626c070fea0a68a071a4160ff695eb4731dand * Unless required by applicable law or agreed to in writing, software
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * distributed under the License is distributed on an "AS IS" BASIS,
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * See the License for the specific language governing permissions and
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen * limitations under the License.
3f08db06526d6901aa08c110b5bc7dde6bc39905nd * logresolve 2.0
2a0b626c070fea0a68a071a4160ff695eb4731dand * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
2a0b626c070fea0a68a071a4160ff695eb4731dand * UUNET Canada, April 16, 1995
2a0b626c070fea0a68a071a4160ff695eb4731dand * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
2a0b626c070fea0a68a071a4160ff695eb4731dand * Rewritten again, and ported to APR by Colm MacCarthaigh
27dcd8d81085fd60aadcd8a9bad35a607b26b758nilgun * Usage: logresolve [-s filename] [-c] < access_log > new_log
e609c337f729875bc20e01096c7e610f45356f54nilgun * Arguments:
f086b4b402fa9a2fefc7dda85de2a3cc1cd0a654rjung * -s filename name of a file to record statistics
2a0b626c070fea0a68a071a4160ff695eb4731dand * -c check the DNS for a matching A record for the host.
4b575a6b6704b516f22d65a3ad35696d7b9ba372rpluem * Notes: (For historical interest)
4b575a6b6704b516f22d65a3ad35696d7b9ba372rpluem * To generate meaningful statistics from an HTTPD log file, it's good
2a0b626c070fea0a68a071a4160ff695eb4731dand * to have the domain name of each machine that accessed your site, but
2a0b626c070fea0a68a071a4160ff695eb4731dand * doing this on the fly can slow HTTPD down.
2a0b626c070fea0a68a071a4160ff695eb4731dand * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
2a0b626c070fea0a68a071a4160ff695eb4731dand * resolution off. Before running your stats program, just run your log
2a0b626c070fea0a68a071a4160ff695eb4731dand * file through this program (logresolve) and all of your IP numbers will
2a0b626c070fea0a68a071a4160ff695eb4731dand * be resolved into hostnames (where possible).
2a0b626c070fea0a68a071a4160ff695eb4731dand * logresolve takes an HTTPD access log (in the COMMON log file format,
2a0b626c070fea0a68a071a4160ff695eb4731dand * or any other format that has the IP number/domain name as the first
2a0b626c070fea0a68a071a4160ff695eb4731dand * field for that matter), and outputs the same file with all of the
2a0b626c070fea0a68a071a4160ff695eb4731dand * domain names looked up. Where no domain name can be found, the IP
2a0b626c070fea0a68a071a4160ff695eb4731dand * number is left in.
2a0b626c070fea0a68a071a4160ff695eb4731dand * To minimize impact on your nameserver, logresolve has its very own
2a0b626c070fea0a68a071a4160ff695eb4731dand * internal hash-table cache. This means that each IP number will only
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedooh * be looked up the first time it is found in the log file.
2a0b626c070fea0a68a071a4160ff695eb4731dand * The -c option causes logresolve to apply the same check as httpd
2a0b626c070fea0a68a071a4160ff695eb4731dand * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
2a0b626c070fea0a68a071a4160ff695eb4731dand * address, it looks up the IP addresses for the hostname and checks
2a0b626c070fea0a68a071a4160ff695eb4731dand * that one of these matches the original address.
2a0b626c070fea0a68a071a4160ff695eb4731dand/* Statistics */
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int cachehits = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int cachesize = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int entries = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int resolves = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int withname = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int doublefailed = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic int noreverse = 0;
2a0b626c070fea0a68a071a4160ff695eb4731dand * prints various statistics to output
2a0b626c070fea0a68a071a4160ff695eb4731dand apr_file_printf(output, " - Double lookup failed : %d" NL,
2a0b626c070fea0a68a071a4160ff695eb4731dand apr_file_printf(output, "Cache hits : %d" NL, cachehits);
2a0b626c070fea0a68a071a4160ff695eb4731dand apr_file_printf(output, "Cache size : %d" NL, cachesize);
2a0b626c070fea0a68a071a4160ff695eb4731dand * usage info
2a0b626c070fea0a68a071a4160ff695eb4731dandstatic void usage(void)
2a0b626c070fea0a68a071a4160ff695eb4731dand "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
2a0b626c070fea0a68a071a4160ff695eb4731dand " -c Perform double lookups when resolving IP addresses." NL,
2a0b626c070fea0a68a071a4160ff695eb4731dand const char * arg;
2a0b626c070fea0a68a071a4160ff695eb4731dand#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
2a0b626c070fea0a68a071a4160ff695eb4731dand if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
2a0b626c070fea0a68a071a4160ff695eb4731dand while (1) {
2a0b626c070fea0a68a071a4160ff695eb4731dand switch (opt) {
2a0b626c070fea0a68a071a4160ff695eb4731dand } /* switch */
2a0b626c070fea0a68a071a4160ff695eb4731dand } /* else */
2a0b626c070fea0a68a071a4160ff695eb4731dand } /* while */
2a0b626c070fea0a68a071a4160ff695eb4731dand#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
2a0b626c070fea0a68a071a4160ff695eb4731dand /* Allocate two new 10k file buffers */
2a0b626c070fea0a68a071a4160ff695eb4731dand if ((outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL ||
2a0b626c070fea0a68a071a4160ff695eb4731dand /* Set the buffers */
2a0b626c070fea0a68a071a4160ff695eb4731dand while (apr_file_gets(line, sizeof(line), infile) == APR_SUCCESS) {
2a0b626c070fea0a68a071a4160ff695eb4731dand /* Count our log entries */
2a0b626c070fea0a68a071a4160ff695eb4731dand /* Check if this could even be an IP address */
0d0ba3a410038e179b695446bb149cce6264e0abnd /* Terminate the line at the next space */
0d0ba3a410038e179b695446bb149cce6264e0abnd /* See if we have it in our cache */
0d0ba3a410038e179b695446bb149cce6264e0abnd hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
0d0ba3a410038e179b695446bb149cce6264e0abnd /* Parse the IP address */
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd /* Not an IP address */
resolves++;
cachesize++;
noreverse++;
if (doublelookups) {
0, pline);
doublefailed++;
if (stats) {