logresolve.c revision cf79f5a1c696efe7c0b959a3c375f669ac496065
252N/A/* Licensed to the Apache Software Foundation (ASF) under one or more
252N/A * contributor license agreements. See the NOTICE file distributed with
252N/A * this work for additional information regarding copyright ownership.
252N/A * The ASF licenses this file to You under the Apache License, Version 2.0
252N/A * (the "License"); you may not use this file except in compliance with
252N/A * the License. You may obtain a copy of the License at
252N/A *
252N/A * http://www.apache.org/licenses/LICENSE-2.0
252N/A *
252N/A * Unless required by applicable law or agreed to in writing, software
252N/A * distributed under the License is distributed on an "AS IS" BASIS,
252N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
252N/A * See the License for the specific language governing permissions and
252N/A * limitations under the License.
252N/A */
252N/A
252N/A/*
252N/A * logresolve 2.0
252N/A *
252N/A * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
252N/A * UUNET Canada, April 16, 1995
252N/A *
252N/A * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
252N/A * Rewritten again, and ported to APR by Colm MacCarthaigh
252N/A *
252N/A * Usage: logresolve [-s filename] [-c] < access_log > new_log
252N/A *
252N/A * Arguments:
252N/A * -s filename name of a file to record statistics
252N/A * -c check the DNS for a matching A record for the host.
252N/A *
252N/A * Notes: (For historical interest)
252N/A *
252N/A * To generate meaningful statistics from an HTTPD log file, it's good
252N/A * to have the domain name of each machine that accessed your site, but
252N/A * doing this on the fly can slow HTTPD down.
252N/A *
252N/A * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
252N/A * resolution off. Before running your stats program, just run your log
252N/A * file through this program (logresolve) and all of your IP numbers will
252N/A * be resolved into hostnames (where possible).
252N/A *
252N/A * logresolve takes an HTTPD access log (in the COMMON log file format,
252N/A * or any other format that has the IP number/domain name as the first
252N/A * field for that matter), and outputs the same file with all of the
252N/A * domain names looked up. Where no domain name can be found, the IP
252N/A * number is left in.
252N/A *
252N/A * To minimize impact on your nameserver, logresolve has its very own
252N/A * internal hash-table cache. This means that each IP number will only
252N/A * be looked up the first time it is found in the log file.
252N/A *
252N/A * The -c option causes logresolve to apply the same check as httpd
252N/A * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
252N/A * address, it looks up the IP addresses for the hostname and checks
252N/A * that one of these matches the original address.
252N/A */
252N/A
252N/A#include "apr.h"
252N/A#include "apr_lib.h"
252N/A#include "apr_hash.h"
252N/A#include "apr_getopt.h"
252N/A#include "apr_strings.h"
252N/A#include "apr_file_io.h"
252N/A#include "apr_network_io.h"
252N/A
252N/A#if APR_HAVE_STDLIB_H
252N/A#include <stdlib.h>
252N/A#endif
252N/A
252N/Astatic apr_file_t *errfile;
252N/Astatic const char *shortname = "logresolve";
252N/Astatic apr_hash_t *cache;
252N/A
252N/A/* Statistics */
252N/Astatic int cachehits = 0;
252N/Astatic int cachesize = 0;
252N/Astatic int entries = 0;
252N/Astatic int resolves = 0;
252N/Astatic int withname = 0;
252N/Astatic int doublefailed = 0;
252N/Astatic int noreverse = 0;
252N/A
252N/A/*
252N/A * prints various statistics to output
252N/A */
252N/A#define NL APR_EOL_STR
252N/Astatic void print_statistics (apr_file_t *output)
252N/A{
252N/A apr_file_printf(output, "logresolve Statistics:" NL);
252N/A apr_file_printf(output, "Entries: %d" NL, entries);
252N/A apr_file_printf(output, " With name : %d" NL, withname);
252N/A apr_file_printf(output, " Resolves : %d" NL, resolves);
252N/A
252N/A if (noreverse) {
252N/A apr_file_printf(output, " - No reverse : %d" NL,
252N/A noreverse);
252N/A }
252N/A
252N/A if (doublefailed) {
252N/A apr_file_printf(output, " - Double lookup failed : %d" NL,
252N/A doublefailed);
252N/A }
252N/A
252N/A apr_file_printf(output, "Cache hits : %d" NL, cachehits);
252N/A apr_file_printf(output, "Cache size : %d" NL, cachesize);
252N/A}
252N/A
252N/A/*
252N/A * usage info
252N/A */
252N/Astatic void usage(void)
252N/A{
252N/A apr_file_printf(errfile,
252N/A "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
252N/A "Usage: %s [-s STATFILE] [-c]" NL
252N/A NL
252N/A "Options:" NL
252N/A " -s Record statistics to STATFILE when finished." NL
252N/A NL
252N/A " -c Perform double lookups when resolving IP addresses." NL,
252N/A shortname, shortname);
252N/A exit(1);
252N/A}
252N/A#undef NL
252N/A
252N/Aint main(int argc, const char * const argv[])
252N/A{
252N/A apr_file_t * outfile;
252N/A apr_file_t * infile;
252N/A apr_file_t * statsfile;
252N/A apr_sockaddr_t * ip;
252N/A apr_sockaddr_t * ipdouble;
252N/A apr_getopt_t * o;
252N/A apr_pool_t * pool;
252N/A apr_status_t status;
252N/A const char * arg;
252N/A char opt;
252N/A char * stats = NULL;
252N/A char * space;
252N/A char * hostname;
252N/A#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
252N/A char * inbuffer;
252N/A char * outbuffer;
252N/A#endif
252N/A char line[2048];
252N/A int doublelookups = 0;
252N/A
252N/A if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
252N/A return 1;
252N/A }
252N/A atexit(apr_terminate);
252N/A
252N/A if (argc) {
252N/A shortname = apr_filepath_name_get(argv[0]);
252N/A }
252N/A
252N/A if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
252N/A return 1;
252N/A }
252N/A apr_file_open_stderr(&errfile, pool);
252N/A apr_getopt_init(&o, pool, argc, argv);
252N/A
252N/A while (1) {
252N/A status = apr_getopt(o, "s:c", &opt, &arg);
252N/A if (status == APR_EOF) {
252N/A break;
252N/A }
252N/A else if (status != APR_SUCCESS) {
252N/A usage();
252N/A }
252N/A else {
252N/A switch (opt) {
252N/A case 'c':
252N/A if (doublelookups) {
252N/A usage();
252N/A }
252N/A doublelookups = 1;
252N/A break;
252N/A case 's':
252N/A if (stats) {
252N/A usage();
252N/A }
252N/A stats = apr_pstrdup(pool, arg);
252N/A break;
252N/A } /* switch */
252N/A } /* else */
252N/A } /* while */
252N/A
252N/A apr_file_open_stdout(&outfile, pool);
252N/A apr_file_open_stdin(&infile, pool);
252N/A
252N/A#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3)
252N/A /* Allocate two new 10k file buffers */
252N/A if ((outbuffer = apr_palloc(pool, 10240)) == NULL ||
252N/A (inbuffer = apr_palloc(pool, 10240)) == NULL) {
252N/A return 1;
252N/A }
252N/A
252N/A /* Set the buffers */
252N/A apr_file_buffer_set(infile, inbuffer, 10240);
252N/A apr_file_buffer_set(outfile, outbuffer, 10240);
252N/A#endif
252N/A
252N/A cache = apr_hash_make(pool);
252N/A
252N/A while (apr_file_gets(line, 2048, infile) == APR_SUCCESS) {
252N/A if (line[0] == '\0') {
252N/A continue;
252N/A }
252N/A
252N/A /* Count our log entries */
252N/A entries++;
252N/A
252N/A /* Check if this could even be an IP address */
252N/A if (!apr_isxdigit(line[0]) && line[0] != ':') {
252N/A withname++;
252N/A apr_file_puts(line, outfile);
252N/A continue;
252N/A }
252N/A
252N/A /* Terminate the line at the next space */
252N/A if ((space = strchr(line, ' ')) != NULL) {
252N/A *space = '\0';
252N/A }
252N/A
252N/A /* See if we have it in our cache */
252N/A hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
252N/A if (hostname) {
252N/A apr_file_printf(outfile, "%s", hostname);
252N/A if (space)
252N/A apr_file_printf(outfile, " %s", space + 1);
252N/A cachehits++;
252N/A continue;
252N/A }
252N/A
252N/A /* Parse the IP address */
252N/A status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool);
252N/A if (status != APR_SUCCESS) {
252N/A /* Not an IP address */
252N/A withname++;
252N/A if (space) *space = ' ';
252N/A apr_file_puts(line, outfile);
252N/A continue;
252N/A }
252N/A
252N/A /* This does not make much sense, but historically "resolves" means
252N/A * "parsed as an IP address". It does not mean we actually resolved
252N/A * the IP address into a hostname.
252N/A */
252N/A resolves++;
252N/A
252N/A /* From here on our we cache each result, even if it was not
252N/A * succesful
252N/A */
252N/A cachesize++;
252N/A
252N/A /* Try and perform a reverse lookup */
252N/A status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
252N/A if (status || hostname == NULL) {
252N/A /* Could not perform a reverse lookup */
252N/A *space = ' ';
252N/A apr_file_puts(line, outfile);
252N/A noreverse++;
252N/A
252N/A /* Add to cache */
252N/A *space = '\0';
252N/A apr_hash_set(cache, line, APR_HASH_KEY_STRING,
252N/A apr_pstrdup(pool, line));
252N/A continue;
252N/A }
252N/A
252N/A /* Perform a double lookup */
252N/A if (doublelookups) {
252N/A /* Do a forward lookup on our hostname, and see if that matches our
252N/A * original IP address.
252N/A */
252N/A status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
252N/A 0, pool);
252N/A if (status == APR_SUCCESS ||
252N/A memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
252N/A /* Double-lookup failed */
252N/A *space = ' ';
252N/A apr_file_puts(line, outfile);
252N/A doublefailed++;
252N/A
252N/A /* Add to cache */
252N/A *space = '\0';
252N/A apr_hash_set(cache, line, APR_HASH_KEY_STRING,
252N/A apr_pstrdup(pool, line));
252N/A continue;
252N/A }
252N/A }
252N/A
252N/A /* Outout the resolved name */
252N/A apr_file_printf(outfile, "%s %s", hostname, space + 1);
252N/A
252N/A /* Store it in the cache */
252N/A apr_hash_set(cache, line, APR_HASH_KEY_STRING,
252N/A apr_pstrdup(pool, hostname));
252N/A }
252N/A
252N/A /* Flush any remaining output */
252N/A apr_file_flush(outfile);
252N/A
252N/A if (stats) {
252N/A if (apr_file_open(&statsfile, stats,
252N/A APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
252N/A APR_OS_DEFAULT, pool) != APR_SUCCESS) {
252N/A apr_file_printf(errfile, "%s: Could not open %s for writing.",
252N/A shortname, stats);
252N/A return 1;
252N/A }
252N/A print_statistics(statsfile);
252N/A apr_file_close(statsfile);
252N/A }
252N/A
252N/A return 0;
252N/A}
252N/A