650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#!@perlbin@
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# Licensed to the Apache Software Foundation (ASF) under one or more
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# contributor license agreements. See the NOTICE file distributed with
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# this work for additional information regarding copyright ownership.
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# The ASF licenses this file to You under the Apache License, Version 2.0
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# (the "License"); you may not use this file except in compliance with
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# the License. You may obtain a copy of the License at
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# http://www.apache.org/licenses/LICENSE-2.0
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# Unless required by applicable law or agreed to in writing, software
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# distributed under the License is distributed on an "AS IS" BASIS,
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# See the License for the specific language governing permissions and
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# limitations under the License.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# logresolve.pl
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# v 1.2 by robh imdb.com
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# usage: logresolve.pl <infile >outfile
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# input = Apache/NCSA/.. logfile with IP numbers at start of lines
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# output = same logfile with IP addresses resolved to hostnames where
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# name lookups succeeded.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# this differs from the C based 'logresolve' in that this script
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# spawns a number ($CHILDREN) of subprocesses to resolve addresses
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# order to keep things moving quickly.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# the parent process handles caching of IP->hostnames using a Perl hash
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# it also avoids sending the same IP to multiple child processes to be
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# resolved multiple times concurrently.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Depending on the settings of $CHILDREN and $TIMEOUT you should see
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# significant reductions in the overall time taken to resolve your
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# logfile lines processed per hour compared to ~45,000 per hour
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# with 'logresolve'.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# I haven't yet seen any noticable reduction in the percentage of IPs
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# that fail to get resolved. Your mileage will no doubt vary. 5s is long
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# enough to wait IMO.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Known to work with FreeBSD 2.2
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Known to have problems with Solaris
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# 980417 - use 'sockaddr_un' for bind/connect to make the script work
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# with linux. Fix from Luuk de Boer <luuk_de_boer pi.net>
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarrequire 5.004;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar$|=1;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoaruse FileHandle;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoaruse Socket;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoaruse strict;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarno strict 'refs';
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoaruse vars qw($PROTOCOL);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar$PROTOCOL = 0;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy $CHILDREN = 40;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy $TIMEOUT = 5;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy $filename;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy %hash = ();
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy $parent = $$;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy @children = ();
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarfor (my $child = 1; $child <=$CHILDREN; $child++) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $f = fork();
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (!$f) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $filename = "./.socket.$parent.$child";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar &child($child);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar exit(0);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar push(@children, $f);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar&parent;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar&cleanup;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar## remove all temporary files before shutting down
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarsub cleanup {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # die kiddies, die
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar kill(15, @children);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar for (my $child = 1; $child <=$CHILDREN; $child++) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (-e "./.socket.$parent.$child") {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar unlink("./.socket.$parent.$child")
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar || warn ".socket.$parent.$child $!";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarsub parent {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # Trap some possible signals to trigger temp file cleanup
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my %CHILDSOCK;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $filename;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## fork child processes. Each child will create a socket connection
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## to this parent and use an unique temp filename to do so.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar for (my $child = 1; $child <=$CHILDREN; $child++) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $CHILDSOCK{$child}= FileHandle->new;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar warn "parent socket to child failed $!";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $filename = "./.socket.$parent.$child";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $response;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar do {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if ($response != 1) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar sleep(1);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar } while ($response != 1);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $CHILDSOCK{$child}->autoflush;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## All child processes should now be ready or at worst warming up
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my (@buffer, $child, $ip, $rest, $hostname, $response);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## read the logfile lines from STDIN
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar while(<STDIN>) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar @buffer = (); # empty the logfile line buffer array.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $child = 1; # children are numbered 1..N, start with #1
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # while we have a child to talk to and data to give it..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar do {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar push(@buffer, $_); # buffer the line
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar unless ($hash{$ip}) { # resolve if unseen IP
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $hash{$ip} = $ip; # don't look it up again.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $child++;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## now poll each child for a response
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar while (--$child > 0) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $response = $CHILDSOCK{$child}->getline;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar chomp($response);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # child sends us back both the IP and HOSTNAME, no need for us
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # to remember what child received any given IP, and no worries
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # what order we talk to the children
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ($ip, $hostname) = split(/\|/, $response, 2);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $hash{$ip} = $hostname;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # resolve all the logfiles lines held in the log buffer array..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar for (my $line = 0; $line <=$#buffer; $line++) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # get next buffered line
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ($ip, $rest) = split(/ /, $buffer[$line], 2);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # separate IP from rest and replace with cached hostname
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar printf STDOUT ("%s %s", $hash{$ip}, $rest);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar########################################
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarsub child {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # arg = numeric ID - how the parent refers to me
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $me = shift;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # add trap for alarm signals.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $SIG{'ALRM'} = sub { die "alarmed"; };
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # create a socket to communicate with parent
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar || die "Error with Socket: !$\n";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $filename = "./.socket.$parent.$me";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar bind(INBOUND, sockaddr_un($filename))
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar || die "Error Binding $filename: $!\n";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar listen(INBOUND, 5) || die "Error Listening: $!\n";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my ($ip, $send_back);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $talk = FileHandle->new;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # accept a connection from the parent process. We only ever have
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # have one connection where we exchange 1 line of info with the
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # parent.. 1 line in (IP address), 1 line out (IP + hostname).
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar accept($talk, INBOUND) || die "Error Accepting: $!\n";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # disable I/O buffering just in case
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $talk->autoflush;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # while the parent keeps sending data, we keep responding..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar while(($ip = $talk->getline)) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar chomp($ip);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # resolve the IP if time permits and send back what we found..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $send_back = sprintf("%s|%s", $ip, &nslookup($ip));
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $talk->print($send_back."\n");
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# perform a time restricted hostname lookup.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarsub nslookup {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # get the IP as an arg
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $ip = shift;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $hostname = undef;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # do the hostname lookup inside an eval. The eval will use the
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # already configured SIGnal handler and drop out of the {} block
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # regardless of whether the alarm occured or not.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar eval {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar alarm($TIMEOUT);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar alarm(0);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar };
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if ($@ =~ /alarm/) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # useful for debugging perhaps..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # print "alarming, isn't it? ($ip)";
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar }
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # return the hostname or the IP address itself if there is no hostname
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $hostname ne "" ? $hostname : $ip;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar