logresolve.pl.in revision ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dc
#!@perlbin@
#
# Copyright 2000-2004 Apache Software Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
#
# v 1.2 by robh imdb.com
#
# usage: logresolve.pl <infile >outfile
#
# output = same logfile with IP addresses resolved to hostnames where
# name lookups succeeded.
#
# this differs from the C based 'logresolve' in that this script
# spawns a number ($CHILDREN) of subprocesses to resolve addresses
# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
# order to keep things moving quickly.
#
# the parent process handles caching of IP->hostnames using a Perl hash
# it also avoids sending the same IP to multiple child processes to be
# resolved multiple times concurrently.
#
# Depending on the settings of $CHILDREN and $TIMEOUT you should see
# significant reductions in the overall time taken to resolve your
# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
# logfile lines processed per hour compared to ~45,000 per hour
# with 'logresolve'.
#
# I haven't yet seen any noticable reduction in the percentage of IPs
# that fail to get resolved. Your mileage will no doubt vary. 5s is long
# enough to wait IMO.
#
# Known to work with FreeBSD 2.2
# Known to have problems with Solaris
#
# with linux. Fix from Luuk de Boer <luuk_de_boer pi.net>
require 5.004;
$|=1;
use FileHandle;
use Socket;
use strict;
$PROTOCOL = 0;
my $CHILDREN = 40;
my $TIMEOUT = 5;
my $filename;
my %hash = ();
my $parent = $$;
my @children = ();
if (!$f) {
exit(0);
}
}
&parent;
&cleanup;
## remove all temporary files before shutting down
sub cleanup {
# die kiddies, die
}
}
}
sub parent {
# Trap some possible signals to trigger temp file cleanup
my %CHILDSOCK;
my $filename;
## fork child processes. Each child will create a socket connection
## to this parent and use an unique temp filename to do so.
}
my $response;
do {
}
}
## All child processes should now be ready or at worst warming up
## read the logfile lines from STDIN
# while we have a child to talk to and data to give it..
do {
$child++;
}
## now poll each child for a response
$response = $CHILDSOCK{$child}->getline;
# child sends us back both the IP and HOSTNAME, no need for us
# to remember what child received any given IP, and no worries
# what order we talk to the children
}
# resolve all the logfiles lines held in the log buffer array..
# get next buffered line
# separate IP from rest and replace with cached hostname
}
}
}
########################################
sub child {
# arg = numeric ID - how the parent refers to me
my $me = shift;
# add trap for alarm signals.
# create a socket to communicate with parent
my $talk = FileHandle->new;
# accept a connection from the parent process. We only ever have
# have one connection where we exchange 1 line of info with the
# parent.. 1 line in (IP address), 1 line out (IP + hostname).
# disable I/O buffering just in case
# while the parent keeps sending data, we keep responding..
# resolve the IP if time permits and send back what we found..
}
}
# perform a time restricted hostname lookup.
sub nslookup {
# get the IP as an arg
my $ip = shift;
# do the hostname lookup inside an eval. The eval will use the
# already configured SIGnal handler and drop out of the {} block
# regardless of whether the alarm occured or not.
eval {
};
# useful for debugging perhaps..
# print "alarming, isn't it? ($ip)";
}
# return the hostname or the IP address itself if there is no hostname
}