842ae4bd224140319ae7feec1872b93dfd491143fielding/* Licensed to the Apache Software Foundation (ASF) under one or more
842ae4bd224140319ae7feec1872b93dfd491143fielding * contributor license agreements. See the NOTICE file distributed with
842ae4bd224140319ae7feec1872b93dfd491143fielding * this work for additional information regarding copyright ownership.
842ae4bd224140319ae7feec1872b93dfd491143fielding * The ASF licenses this file to You under the Apache License, Version 2.0
842ae4bd224140319ae7feec1872b93dfd491143fielding * (the "License"); you may not use this file except in compliance with
842ae4bd224140319ae7feec1872b93dfd491143fielding * the License. You may obtain a copy of the License at
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * Unless required by applicable law or agreed to in writing, software
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * distributed under the License is distributed on an "AS IS" BASIS,
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * See the License for the specific language governing permissions and
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * limitations under the License.
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * logresolve 2.0
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * UUNET Canada, April 16, 1995
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * Rewritten again, and ported to APR by Colm MacCarthaigh
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Usage: logresolve [-s filename] [-c] < access_log > new_log
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Arguments:
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * -s filename name of a file to record statistics
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * -c check the DNS for a matching A record for the host.
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * Notes: (For historical interest)
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * To generate meaningful statistics from an HTTPD log file, it's good
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * to have the domain name of each machine that accessed your site, but
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * doing this on the fly can slow HTTPD down.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * resolution off. Before running your stats program, just run your log
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * file through this program (logresolve) and all of your IP numbers will
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * be resolved into hostnames (where possible).
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * logresolve takes an HTTPD access log (in the COMMON log file format,
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * or any other format that has the IP number/domain name as the first
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * field for that matter), and outputs the same file with all of the
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * domain names looked up. Where no domain name can be found, the IP
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * number is left in.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * To minimize impact on your nameserver, logresolve has its very own
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * internal hash-table cache. This means that each IP number will only
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * be looked up the first time it is found in the log file.
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * The -c option causes logresolve to apply the same check as httpd
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * address, it looks up the IP addresses for the hostname and checks
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * that one of these matches the original address.
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm/* Statistics */
09fe0b69d3d1e8c8041c9ce99ee77b8b44b5e3b1fielding * prints various statistics to output
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm apr_file_printf(output, " With name : %d" NL, withname);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm apr_file_printf(output, " Resolves : %d" NL, resolves);
17dc8282ea6b3ad1bbc661b498de9ec2e9987edejim apr_file_printf(output, " - Double lookup failed : %d" NL,
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm apr_file_printf(output, "Cache hits : %d" NL, cachehits);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm apr_file_printf(output, "Cache size : %d" NL, cachesize);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * usage info
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm " -c Perform double lookups when resolving IP addresses." NL,
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm while (1) {
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm switch (opt) {
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm } /* switch */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm } /* else */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm } /* while */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Allocate two new 10k file buffers */
42575bb15a3ed6084cd1e225aa9251829dbdd258sf if ( (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Set the buffers */
e18cfc773d01b1c7e93fabc09f94bd068a53dabetakashi apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE);
e18cfc773d01b1c7e93fabc09f94bd068a53dabetakashi apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE);
42575bb15a3ed6084cd1e225aa9251829dbdd258sf while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) {
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Count our log entries */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Check if this could even be an IP address */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Terminate the line at the next space */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* See if we have it in our cache */
2b672ae3a6d190fb62d04f4f47bbdc0a2bde151fcolm hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
02c8e5e329cb0652f9444b492df79cca886bb762jorton apr_file_printf(outfile, "%s %s", hostname, space + 1);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Parse the IP address */
b76764362f1f69ced135b0f2e1481a26fef04347takashi status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Not an IP address */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* This does not make much sense, but historically "resolves" means
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * "parsed as an IP address". It does not mean we actually resolved
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * the IP address into a hostname.
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* From here on our we cache each result, even if it was not
e8f95a682820a599fe41b22977010636be5c2717jim * succesful
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Try and perform a reverse lookup */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Could not perform a reverse lookup */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Add to cache */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Perform a double lookup */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Do a forward lookup on our hostname, and see if that matches our
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm * original IP address.
e8f95a682820a599fe41b22977010636be5c2717jim status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Double-lookup failed */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Add to cache */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Outout the resolved name */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm apr_file_printf(outfile, "%s %s", hostname, space + 1);
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Store it in the cache */
0b4b04d8621478ba59f0a6ba2950ddc02ab92b58colm /* Flush any remaining output */
e8f95a682820a599fe41b22977010636be5c2717jim APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
e8f95a682820a599fe41b22977010636be5c2717jim apr_file_printf(errfile, "%s: Could not open %s for writing.",