logresolve.c revision 66da5b8ab8a770e8cc6c19bb4aea8c5b0028bf25
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* logresolve 2.0
*
* Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
* UUNET Canada, April 16, 1995
*
* Rewritten by David Robinson. (drtr ast.cam.ac.uk)
* Rewritten again, and ported to APR by Colm MacCarthaigh
*
* Usage: logresolve [-s filename] [-c] < access_log > new_log
*
* Arguments:
* -s filename name of a file to record statistics
* -c check the DNS for a matching A record for the host.
*
* Notes: (For historical interest)
*
* To generate meaningful statistics from an HTTPD log file, it's good
* to have the domain name of each machine that accessed your site, but
* doing this on the fly can slow HTTPD down.
*
* Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
* resolution off. Before running your stats program, just run your log
* file through this program (logresolve) and all of your IP numbers will
* be resolved into hostnames (where possible).
*
* logresolve takes an HTTPD access log (in the COMMON log file format,
* field for that matter), and outputs the same file with all of the
* domain names looked up. Where no domain name can be found, the IP
* number is left in.
*
* To minimize impact on your nameserver, logresolve has its very own
* internal hash-table cache. This means that each IP number will only
* be looked up the first time it is found in the log file.
*
* The -c option causes logresolve to apply the same check as httpd
* compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
* address, it looks up the IP addresses for the hostname and checks
* that one of these matches the original address.
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_hash.h"
#include "apr_getopt.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_network_io.h"
#include <stdlib.h>
#endif
static apr_file_t *errfile;
static const char *shortname = "logresolve";
static apr_hash_t *cache;
/* Statistics */
static int cachehits = 0;
static int cachesize = 0;
static int entries = 0;
static int resolves = 0;
static int withname = 0;
static int doublefailed = 0;
static int noreverse = 0;
/*
* prints various statistics to output
*/
#define NL APR_EOL_STR
{
if (noreverse) {
}
if (doublefailed) {
}
}
/*
* usage info
*/
static void usage(void)
{
"%s -- Resolve IP-addresses to hostnames in Apache log files." NL
"Usage: %s [-s STATFILE] [-c]" NL
"Options:" NL
" -s Record statistics to STATFILE when finished." NL
" -c Perform double lookups when resolving IP addresses." NL,
exit(1);
}
{
apr_file_t * infile;
apr_sockaddr_t * ip;
apr_getopt_t * o;
apr_pool_t * pool;
const char * arg;
char opt;
char * space;
char * hostname;
char * inbuffer;
char * outbuffer;
#endif
char line[2048];
int doublelookups = 0;
return 1;
}
if (argc) {
}
return 1;
}
while (1) {
break;
}
else if (status != APR_SUCCESS) {
usage();
}
else {
switch (opt) {
case 'c':
if (doublelookups) {
usage();
}
doublelookups = 1;
break;
case 's':
if (stats) {
usage();
}
break;
} /* switch */
} /* else */
} /* while */
/* Allocate two new 10k file buffers */
return 1;
}
/* Set the buffers */
#endif
if (line[0] == '\0') {
continue;
}
/* Count our log entries */
entries++;
/* Check if this could even be an IP address */
withname++;
continue;
}
/* Terminate the line at the next space */
*space = '\0';
}
/* See if we have it in our cache */
if (hostname) {
cachehits++;
continue;
}
/* Parse the IP address */
if (status != APR_SUCCESS) {
/* Not an IP address */
withname++;
continue;
}
/* This does not make much sense, but historically "resolves" means
* "parsed as an IP address". It does not mean we actually resolved
* the IP address into a hostname.
*/
resolves++;
/* From here on our we cache each result, even if it was not
* succesful
*/
cachesize++;
/* Try and perform a reverse lookup */
/* Could not perform a reverse lookup */
*space = ' ';
noreverse++;
/* Add to cache */
*space = '\0';
continue;
}
/* Perform a double lookup */
if (doublelookups) {
/* Do a forward lookup on our hostname, and see if that matches our
* original IP address.
*/
0, pool);
if (status == APR_SUCCESS ||
/* Double-lookup failed */
*space = ' ';
doublefailed++;
/* Add to cache */
*space = '\0';
continue;
}
}
/* Outout the resolved name */
/* Store it in the cache */
}
/* Flush any remaining output */
if (stats) {
return 1;
}
}
return 0;
}