lari.pl revision 7010c12ad3ac2cada55cf126121a8c46957d3632
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
#
# Link Analysis of Runtime Interfaces.
#
# Define all global variables (required for strict)
use strict;
# Pattern match to skip objects.
$Rtld = qr{
}x;
# Pattern matching required to determine a global symbol.
$GlobWeak = qr{ ^(?:
GLOB |
)$
}x;
# Pattern matching to determine link-editor specific symbols and those common
# to the compilation environment (ie. provided by all crt's).
$MultSyms = qr{ ^(?:
_DYNAMIC |
_etext |
_edata |
_end |
_init |
_fini |
_lib_version | # Defined in values
__xpg4 | # Defined in values
__xpg6 # Defined in values
)$
}x;
$CrtSyms = qr{ ^(?:
___Argv | # Defined in crt
__environ_lock | # Defined in crt
_environ | # Defined in crt
environ # Defined in crt
)$
}x;
# Pattern match to remove undefined, NOTY and versioning symbols.
$UndefSym = qr{ ^(?:
)$
}x;
$IgnSyms = qr{ ^(?:
NOTY |
)$
}x;
# Symbol flags.
# Offsets into $Symbols{$SymName}{$Obj} array.
$ObjRef = 0;
$ObjFlag = 1;
$ObjSize = 2;
# Establish locale
textdomain("SUNW_OST_SGS");
# Establish a program name for any error diagnostics.
sub inappropriate {
if ($Flag) {
printf STDERR
gettext("%s: inappropriate use of %s with %s: %s ignored\n"),
} else {
printf STDERR
gettext("%s: inappropriate use of %s without %s: %s ignored\n"),
}
}
# Cleanup any temporary files on interruption
sub Cleanup {
my ($Sig) = @_;
if ($DbgSeed ne "") {
unlink($File);
}
}
}
exit 1;
}
# Check that we have arguments.
printf STDERR
printf STDERR
print STDERR
gettext("\t[-a] print diagnostics for all symbols\n");
print STDERR
gettext("\t[-b] print diagnostics for multiple-bound " .
"symbols\n");
print STDERR
gettext("\t[-C] print demangled symbol names also\n");
print STDERR
gettext("\t[-D] read debugging information from \"file\"\n");
print STDERR
gettext("\t[-d dir] create mapfiles in \"mapdir\"\n");
print STDERR
gettext("\t[-i] print interesting information (default)\n");
print STDERR
gettext("\t[-m] create mapfiles for interface requirements\n");
print STDERR
gettext("\t[-o] print overhead information\n");
print STDERR
gettext("\t[-s] save bindings information created by ldd(1)\n");
print STDERR
gettext("\t[-v] ignore versioned objects\n");
exit 1;
} else {
# Catch any incompatible argument usage.
if ($opt{m}) {
if ($opt{a}) {
$opt{a} = 0;
}
if ($opt{i}) {
$opt{i} = 0;
}
} else {
if ($opt{d}) {
$opt{d} = 0;
}
}
if ($opt{a}) {
if ($opt{o}) {
$opt{o} = 0;
}
if ($opt{i}) {
$opt{i} = 0;
}
}
if ($opt{o}) {
if ($opt{i}) {
$opt{i} = 0;
}
if ($opt{b}) {
$opt{b} = 0;
}
}
# If -m is used, only one input file is applicable.
if ($opt{m} && ($#ARGV != 0)) {
"with the -m option\n"), $Prog;
exit 1;
}
# Insure any specified directory exists, or apply a default.
if ($opt{d}) {
# User specified directory - make sure it exists.
if (! -d $opt{d}) {
exit 1;
}
} else {
$DestDir = ".";
}
# Establish a temporary directory if necessary.
if (!$opt{D}) {
$TmpDir = "/tmp";
}
}
# Establish any initial ldd(1) argument requirements.
} else {
$LddArgs = '-r -e LD_DEBUG=bindings,files,detail';
}
# If we've been asked to demangle symbols, make sure we can find the
# demangler.
if ($opt{C}) {
if (!$DemName) {
"-C ignored\n"), $Prog;
$opt{C} = 0;
}
}
# If -a or -o hasn't been specified, default to -i.
$opt{i} = 1;
}
# Determine whether we have multiple input files.
if ($#ARGV == 0) {
$Mult = 0;
} else {
$Mult = 1;
}
# Determine what platform we're running on - some inappropriate
# platform specific dependencies are better skipped.
# Establish signal handlers
$DbgSeed = "";
# For each argument determine if we're dealing with a file or directory.
$Error = 0;
if (!stat($Arg)) {
$Error = 1;
next;
}
# Process simple files.
if (-f _) {
if (!-r _) {
$Error = 1;
next;
}
if (!$opt{D}) {
$Error = 1;
}
} else {
# If the -D option is specified, read the
# bindings debugging information from the
# specified file.
if ($Mult) {
print STDOUT "$Arg:\n";
}
}
next;
}
# Process directories.
if (-d _) {
next;
}
$Error = 1;
}
exit $Error;
}
sub ProcDir {
my ($Dir) = @_;
my ($File);
# Open the directory and read each entry, omit "." and "..". Sorting
# the directory listing makes analyzing different source hierarchies
# easier.
next;
}
# If we're decending into a platform directory, ignore
# any inappropriate platform specific files. These
# files can have dependencies that in turn bring in the
# appropriate platform specific file, resulting in more
# than one dependency offering the same interfaces. In
# practice, the non-appropriate platform specific file
# wouldn't be loaded with a process.
next;
}
$File = "$Dir/$Entry";
if (!lstat($File)) {
next;
}
# Ignore symlinks.
if (-l _) {
next;
}
# Descend into, and process any directories.
if (-d _) {
next;
}
# Process any standard files.
if (-f _ && -r _) {
next;
}
}
closedir(DIR);
}
}
# Process a file. If the file was explicitly defined on the command-line, and
# an error occurs, tell the user. Otherwise, this file probably came about from
# scanning a directory, in which case just skip it and move on.
sub ProcFile {
# If we're scanning a directory (ie. /lib) and have picked up ld.so.1,
# ignore it.
return 1;
}
if ($CmdLine) {
}
return 0;
}
# Create a temporary filename for capturing binding information.
# Exercise the file under ldd(1), capturing all the bindings.
@Ldd = split(/\n/,
# If ldd isn't -e capable we'll get a usage message. The -e option was
# introduced in Solaris 9 and related patches. Also, make sure the user
# sees any ldd errors.
$NoFound = 0;
"unable to capture bindings output\n"), $Prog;
exit 1;
}
$NoFound = 1;
last;
}
}
# The runtime linker will have appended a process id to the debug file.
# As we have to intuit the name, make sure there is only one debug
# file match, otherwise there must be some clutter in the output
# directory that is going to mess up our analysis.
}
}
if (@DbgGlob == 0) {
# If there is no debug file, bail. This can occur if the file
# being processed is secure.
if ($CmdLine) {
"bindings output - possible secure application?\n"),
}
return 0;
} elsif (@DbgGlob > 1) {
# Too many debug files found.
if ($CmdLine) {
"output files exist: %s: clean up temporary " .
}
return 0;
} else {
}
# Ok, we're ready to process the bindings information. Print a header
# if necessary, and if there were any ldd(1) errors push some of them
# out before any bindings information. Limit the output, as it can
# sometimes be excessive. If there are errors, the bindings information
# is likely to be incomplete.
if ($Mult) {
print STDOUT "$File:\n";
}
if ($NoFound) {
my ($Cnt) = 4;
print STDOUT "$Line\n";
$Cnt--;
}
if ($Cnt == 0) {
last;
}
}
}
# If the user wants the original debugging file left behind, rename it
# so that it doesn't get re-read by another instance of lari processing
# this file.
if ($opt{s}) {
}
# Now that we've finished with the debugging file, nuke it if necessary.
if (!$opt{s}) {
unlink($DbgFile);
}
$DbgSeed = "";
return 1;
}
sub ProcBindings {
my (%Filtees, $FileHandle);
# Reinitialize our arrays when we're dealing with multiple files.
if ($Mult) {
%Symbols = ();
%Objects = ();
%Versioned = ();
}
# As debugging output can be significant, read a line at a time.
open($FileHandle, "<$DbgFile");
while (defined(my $Line = <$FileHandle>)) {
chomp($Line);
# If we find a relationship between a filter and filtee, save
# it, we'll come back to this once we've gathered everybodies
# symbols.
# Separate the filter and filtee names, ignore the
# runtime linker.
next;
}
next;
}
# If we find a configuration alternative, determine whether it
# is for one of our filtees, and if so record it.
# Separate the original and alternative names.
next;
}
}
}
next;
}
# Collect the symbols from any file analyzed.
GetAllSymbols($1);
next;
}
# Process any symbolic relocations that bind to a file.
my ($BndInfo) = 0;
my ($Offset) = 1;
my ($Dlsym) = 0;
my ($Detail) = 0;
# For greatest flexibility, split the line into fields
# and walk each field until we find what we need.
# The referencing file, "... binding file=".*".
$RefFile = $1;
$Offset++;
last;
}
$Offset++;
}
# The referencing offset, typically this is the address
# of the reference, "(0x1234...)", but in the case of a
# user lookup it's the string "(dlsym)". If we don't
# find this offset information we've been given a debug
# file that didn't user the "datail" token, in which case
# we're not getting all the information we need.
if ($1 eq 'dlsym') {
$Dlsym = 1;
}
$Detail = 1;
$Offset++;
}
# The destination file, "... to file=".*".
$DstFile = $1;
$Offset++;
last;
}
$Offset++;
}
# The symbol being bound, "... symbol `.*' ...".
$SymName = $1;
$Offset++;
last;
}
$Offset++;
}
# Possible trailing binding info, "... (direct,.*)$".
$BndInfo = $1;
$Offset++;
last;
}
$Offset++;
}
if ($Detail == 0) {
"does not contain `detail' information\n"),
return;
}
# Collect the symbols from each object.
# Identify that this definition has been bound to.
# If the reference binds to a definition within
# the same file this symbol may be a candidate
# for reducing to local.
} else {
# This symbol is required to satisfy an external
# reference.
}
# Assign any other state indicated by the binding info
# associated with the diagnostic output.
if (!$BndInfo) {
next;
}
}
}
}
}
}
if ($Dlsym) {
}
}
}
close($FileHandle);
# Now that we've processed all objects, complete any auxiliary filtee
# tagging. For each filtee, determine which of the symbols it exports
# are also defined in its filters. If a filtee is bound to, the
# runtime linkers diagnostics will indicate a filtee binding. However,
# some of the filtee symbols may not be bound to, so here we mark them
# all so as to remove them from any interesting output.
# Standard filters aren't captured at all, as nothing can bind
# to them.
next;
}
# Determine what symbols this filtee offers.
# Ignore the usual reserved stuff.
next;
}
# Determine whether this symbol exists in our filter.
next;
}
$Msft)) {
next;
}
}
}
}
# Process objects and their symbols as required.
if ($opt{m}) {
# If we're creating a mapfile, traverse each object we've
# collected.
# Skip any objects that should be ignored.
next;
}
# Skip any versioned objects if required.
next;
}
# Open the mapfile if required.
if (!open(MAPOUT, "> $Path")) {
exit 1;
}
# Establish the mapfile preamble.
print MAPOUT "#\n# Interface Definition mapfile for:\n";
print MAPOUT "#\tDynamic Object: $Obj\n";
print MAPOUT "#\tProcess: $File\n#\n\n";
# Process each global symbol.
print MAPOUT "$File {\n\tglobal:\n";
# For the first pass we're only interested in
# symbols that have been bound to from an
# external object, or must be global to enable
# a binding to an interposing definition.
# Skip bindings to ourself as these are
# candidates for demoting to local.
next;
}
next;
}
# Add the demangled name as a comment if
# required.
if ($opt{C}) {
if ($DemName ne "") {
print MAPOUT "\t\t#$DemName\n";
}
}
print MAPOUT "\t\t$SymName;\n";
}
# Process each local demotion.
print MAPOUT "\tlocal:\n";
if ($opt{o}) {
foreach my $SymName
# For this pass we're only interested
# in symbol definitions that haven't
# been bound to, or have only been
# bound to from the same object.
next;
}
# Add the demangled name as a comment if
# required.
if ($opt{C}) {
my ($DemName) =
if ($DemName ne "") {
print MAPOUT
"\t\t#$DemName\n";
}
}
print MAPOUT "\t\t$SymName;\n";
}
}
# Capture everything else as local.
print MAPOUT "\t\t\*;\n};\n";
close MAPOUT;
}
} else {
# If we're gathering information regarding the symbols used by
# the process, automatically sort any standard output using the
# symbol name.
if (!open(SORT, "| sort +1")) {
$Prog, $!;
exit 1;
}
my ($Cnt);
# If we're looking for interesting symbols, inspect
# each definition of each symbol. If one is found to
# be interesting, the whole family are printed.
next;
}
# We've found something interesting, or all symbols
# should be output. List all objects that define this
# symbol.
my ($Str) = "$Cnt:";
# Do we just want overhead symbols. Consider
# copy-relocations, and plt address binding,
# as overhead too.
next;
}
# Do we just want all symbols that have been
# bound to.
next;
}
# If we haven't been asked for all symbols, only
# print those reserved symbols that have been
# bound to, as the number of reserved symbols
# can be quite excessive.
next;
}
# Skip any versioned objects if required.
next;
}
# Display this symbol.
} else {
}
# Has the symbol been bound to externally
}
# Has the symbol been bound to from the same
# object.
}
# Has the symbol been bound to directly.
}
# Does this symbol originate for an explicit
# interposer.
}
# Is this symbol the reference data of a copy
# relocation.
}
# Is this symbol part of filtee.
}
# Is this symbol protected (in which case there
# may be a symbolic binding within the same
# object to this symbol).
}
# Is this symbol an executables .plt address.
}
# Does this binding originate from a user
# (dlsym) request.
}
# Does this definition redirect the binding.
}
# Does this definition explicity define no
# direct binding.
}
# Determine whether this is a function or a data
# object. For the latter, display the symbol
# size. Otherwise, the symbol is a reserved
# label, and is left untyped.
$Type = '()';
$Type = '[' .
']';
} else {
$Type = "";
}
# Demangle the symbol name if desired.
if ($Mult) {
print SORT " [$Str]: " .
"$SymName$Type$DemName: $Obj\n";
} else {
print SORT "[$Str]: " .
"$SymName$Type$DemName: $Obj\n";
}
}
}
close SORT;
}
}
# Heuristics to determine whether a symbol binding is interesting. In most
# applications there can be a large amount of symbol binding information to
# wade through. The most typical binding, to a single definition, probably
# isn't interesting or the cause of unexpected behavior. Here, we try and
# determine those bindings that may can cause unexpected behavior.
#
# Note, this routine is actually called for all symbols so that their count
# can be calculated in one place.
sub Interesting
{
my ($SymName) = @_;
# Scan all definitions of this symbol, thus determining the definition
# count, the number of filters, redirections, executable references
# (copy-relocations, or plt addresses), no-direct bindings, and the
# number of definitions that have been bound to.
# Ignore standard filters when determining the symbol count, as
# a standard filter can never be bound to.
$ObjCnt++;
}
$FltCnt++;
}
$NodiCnt++;
}
$ExRef++;
}
$RdirCnt++;
}
# Ignore bindings to undefined .plts, and copy-relocation
# references. These are implementation details, rather than
# a truly interesting multiple-binding. If a symbol is tagged
# as protected, count it as having bound to itself, even though
# we can't tell if it's really been used.
$BndCnt++;
}
}
# If we want all overhead symbols, return the count.
if ($opt{o}) {
return $ObjCnt;
}
# If we want all symbols, return the count. If we want all bound
# symbols, return the count provided it is non-zero.
return $ObjCnt;
}
# Single instance symbol definitions aren't very interesting.
if ($ObjCnt == 1) {
return 0;
}
# Traverse each symbol definition looking for the following:
#
# . Multiple symbols are bound to externally.
# . A symbol is bound to externally, and possibly symbolically.
#
# Two symbol bindings are acceptable in some cases, and thus aren't
# interesting:
#
# . Copy relocations. Here, the executable binds to a shared object
# to access the data definition, which is then copied to the
# executable. All other references should then bind to the copied
# data.
# . Non-plt relocations to functions that are referenced by the
# executable will bind to the .plt in the executable. This
# provides for address comparison calculations (although plainly
# an overhead).
#
# Multiple symbol bindings are acceptable in some cases, and thus aren't
# interesting:
#
# . Filtees. Multiple filtees may exist for one filter.
#
return 0;
}
# Only display any reserved symbols if more than one binding has
# occurred.
($BndCnt < 2)) {
return (0);
}
# For all other symbols, determine whether a binding has occurred.
# Note: definitions within an executable are tagged as protected ("P")
# as they may have been bound to from within the executable - we can't
# tell.
return (0);
}
# Multiple instances of a definition, where all but one are filter
# Effectively, only one symbol is providing the final binding.
return (0);
}
# Multiple instances of explicitly defined no-direct binding symbols
# are known to occur, and their no-binding definition indicates they
# are expected and accounted for. Thus, these aren't interesting.
return (0);
}
# We have an interesting symbol, returns its count.
return $ObjCnt;
}
# Obtain the global symbol definitions of an object and determine whether the
# object has been versioned.
sub GetAllSymbols {
my ($Obj) = @_;
my (%AddrToName, %NameToAddr);
my ($Vers) = 0;
my ($Symb) = 0;
my ($Copy) = 0;
my ($Interpose) = 0;
my ($Fltr) = 0;
# Determine whether we've already retrieved this object's symbols.
# Also, ignore the runtime linker, it's on a separate link-map, and
# except for the filtee symbols that might be bound via libdl, is
# uninteresting. Tag the runtime linker as versioned to simplify
# possible -v processing.
return;
}
return;
}
# Get the dynamic information.
# If there's no information, it's possible we've been given a debug
# output file and are processing it from a location from which the
# dependencies specified in the debug file aren't accessible.
if (!@Elfd) {
# Add the file to our list, so that we don't create the same
# message again. Processing should continue so that we can
# flush out as many error messages as possible.
return;
}
# If we're processing a filter there's no need to save any symbols, as
# no bindings will occur to this object.
#
# Determine whether we've got a symbolicly bound object. With newer
# linkers all symbols will be marked as protected ("P"), but with older
# linkers this state could only be intuited from the symbolic dynamic
# tag.
my (@Fields);
# Determine if the FILTER tag is set.
if ($#Fields == 3) {
next;
}
next;
}
next;
}
# We're only interested in the FLAGS entry.
if (($#Fields < 4) || ($Fields[1] !~ "^FLAGS")) {
next;
}
$Symb = 1;
next;
}
$Interpose = 1;
}
}
# If this file is a dynamic executable, determine if this object has
# any copy relocations so that any associated bindings can be labeled
# more meaningfully.
if ($Type =~ /executable/) {
$Exec = 1;
# Obtain any copy relocations.
next;
}
# Intel relocation records don't contain an addend,
# where as every other supported platform does.
} else {
}
$Copy = 1;
}
} else {
$Exec = 0;
}
# Obtain the dynamic symbol table for this object. Symbol tables can
# be quite large, so open the elfump command through a pipe.
open($FileHandle, "LC_ALL=C elfdump -sN.dynsym '$Obj' 2> /dev/null |");
# Now process all symbols.
while (defined(my $Line = <$FileHandle>)) {
chomp($Line);
my ($Flags);
# We're only interested in defined non-reserved symbol entries.
# Note, ABS and NOTY symbols of non-zero size have been known to
# occur, so capture them.
if (($#Fields < 8) || ($Fields[4] !~ $GlobWeak) ||
next;
}
# If we're found copy relocations, save the address and names
# of any OBJT definitions, together with the copy symbol.
}
}
# If the symbol visibility is protected, this is an internal
# symbolic binding (NOTE, an INTERNAL visibility for a global
# symbol is invalid, but for a while ld(1) was setting this
# attribute mistakenly for protected).
# If this is a dynamic executable, mark its symbols as protected
# (they can't be interposed on any more than symbols defined
# protected within shared objects).
}
# If this object is marked as an interposer, tag each symbol.
if ($Interpose) {
}
# Identify the symbol as a function or data type, and for the
# latter, capture the symbol size. Ignore the standard
# symbolic labels, as we don't want to type them.
if (oct($Size) eq 0) {
$Size = "0";
} else {
}
}
}
# If the version field is non-null this object has already been
# versioned.
$Vers = 1;
}
}
close($FileHandle);
# Obtain any symbol information table for this object. Symbol tables can
# be quite large, so open the elfump command through a pipe.
open($FileHandle, "LC_ALL=C elfdump -y '$Obj' 2> /dev/null |");
# Now process all symbols.
while (defined(my $Line = <$FileHandle>)) {
chomp($Line);
my ($Flags) = 0;
# Binding attributes are in the second column.
if ($#Fields < 1) {
next;
}
if ($Fields[1] =~ /N/) {
}
if ($Fields[1] =~ /F/) {
}
if ($Fields[1] =~ /A/) {
}
# Determine the symbol name based upon the number of fields.
}
}
close($FileHandle);
# If this symbol has already been marked as a copy-relocation reference,
# see if this symbol has any aliases, which should also be marked.
if ($Copy) {
foreach my $SymName (keys(%NameToAddr)) {
# Determine all symbols that have the same address.
next;
}
}
}
}
}
# Demangle a symbol name if required.
sub Demangle
{
my ($SymName) = @_;
my ($DemName);
if ($opt{C}) {
my (@Dem);
# Determine if we've already demangled this name.
}
if (($#Fields < 2) || ($Fields[1] ne '==') ||
next;
}
$DemName =~ s/.*== (.*)$/ \[$1]/;
return($DemName);
}
}
return("");
}