650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar#!@perlbin@
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# Licensed to the Apache Software Foundation (ASF) under one or more
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# contributor license agreements. See the NOTICE file distributed with
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# this work for additional information regarding copyright ownership.
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# The ASF licenses this file to You under the Apache License, Version 2.0
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# (the "License"); you may not use this file except in compliance with
db479b48bd4d75423ed4a45e15b75089d1a8ad72fielding# the License. You may obtain a copy of the License at
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# Unless required by applicable law or agreed to in writing, software
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# distributed under the License is distributed on an "AS IS" BASIS,
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# See the License for the specific language governing permissions and
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# limitations under the License.
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# v 1.2 by robh imdb.com
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# usage: logresolve.pl <infile >outfile
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# input = Apache/NCSA/.. logfile with IP numbers at start of lines
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# output = same logfile with IP addresses resolved to hostnames where
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# name lookups succeeded.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# this differs from the C based 'logresolve' in that this script
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# spawns a number ($CHILDREN) of subprocesses to resolve addresses
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# order to keep things moving quickly.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# the parent process handles caching of IP->hostnames using a Perl hash
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# it also avoids sending the same IP to multiple child processes to be
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# resolved multiple times concurrently.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Depending on the settings of $CHILDREN and $TIMEOUT you should see
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# significant reductions in the overall time taken to resolve your
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# logfile lines processed per hour compared to ~45,000 per hour
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# with 'logresolve'.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# I haven't yet seen any noticable reduction in the percentage of IPs
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# that fail to get resolved. Your mileage will no doubt vary. 5s is long
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# enough to wait IMO.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Known to work with FreeBSD 2.2
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# Known to have problems with Solaris
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# 980417 - use 'sockaddr_un' for bind/connect to make the script work
ce9621257ef9e54c1bbe5ad8a5f445a1f211c2dcnd# with linux. Fix from Luuk de Boer <luuk_de_boer pi.net>
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarrequire 5.004;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoarmy %hash = ();
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar## remove all temporary files before shutting down
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # Trap some possible signals to trigger temp file cleanup
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## fork child processes. Each child will create a socket connection
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## to this parent and use an unique temp filename to do so.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ## All child processes should now be ready or at worst warming up
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my (@buffer, $child, $ip, $rest, $hostname, $response);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $child = 1; # children are numbered 1..N, start with #1
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # while we have a child to talk to and data to give it..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # child sends us back both the IP and HOSTNAME, no need for us
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # to remember what child received any given IP, and no worries
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # resolve all the logfiles lines held in the log buffer array..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # separate IP from rest and replace with cached hostname
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar########################################
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $me = shift;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # accept a connection from the parent process. We only ever have
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # have one connection where we exchange 1 line of info with the
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # parent.. 1 line in (IP address), 1 line out (IP + hostname).
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # while the parent keeps sending data, we keep responding..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # resolve the IP if time permits and send back what we found..
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar# perform a time restricted hostname lookup.
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar my $ip = shift;
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # do the hostname lookup inside an eval. The eval will use the
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # already configured SIGnal handler and drop out of the {} block
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
650b8deadd86f9326c0d4a1bcbbb61cb907722aacoar # return the hostname or the IP address itself if there is no hostname