appcert.pl revision 7c478bd95313f5f23a4c958a745db2134aa03244
#!/usr/perl5/bin/perl -w
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License, Version 1.0 only
# (the "License"). You may not use this file except in compliance
# with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# ident "%Z%%M% %I% %E% SMI"
#
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# This is the top level script for performing the appcert checks. It
# reads the command line options, determines list of binaries to check,
# and then calls symprof (the raw symbol profiler), symcheck (that
# checks for unstable behavior), and symreport (that constructs and
# outputs a rollup report)
#
require 5.005;
use strict;
use locale;
use Getopt::Std;
use POSIX qw(locale_h);
use Sun::Solaris::Utils qw(textdomain gettext);
use File::Basename;
use File::Path;
use lib qw(/usr/lib/abi/appcert);
use AppcertUtil;
setlocale(LC_ALL, "");
textdomain(TEXT_DOMAIN);
use vars qw(
@item_list
$file_list
$do_not_follow_symlinks
$modify_ld_path
$append_solaris_dirs_to_ld_path
$skipped_count
);
my $caught_signal = 0;
my $record_binary_call_count = 0;
# The directory where the appcert specific scripts and data reside:
$appcert_lib_dir = "/usr/lib/abi/appcert";
set_clean_up_exit_routine(\&clean_up_exit);
signals('on', \&interrupted);
get_options();
@item_list = @ARGV; # List of directories and/or objects to check.
check_item_list();
set_working_dir();
find_binaries(); # Records all of the binary objects to check.
supplement_ld_library_path();
export_vars_to_environment(); # Exports info for our child scripts to use.
run_profiler(); # Run the script symprof.
run_checker(); # Run script symcheck.
run_report_generator(); # Run the script symreport.
my $rc = overall_result_code();
clean_up();
exit $rc;
#
# This subroutine calls getopts() and sets up variables reflecting how
# we were called.
#
sub get_options
{
my %opt;
getopts('?hnLBSw:f:', \%opt) || (show_usage() && exiter(2));
if (exists($opt{'?'}) || exists($opt{'h'})) {
show_usage();
exiter(2);
}
if (exists($opt{'f'})) {
$file_list = $opt{'f'};
} else {
$file_list = '';
}
if (exists($opt{'w'})) {
$working_dir = $opt{'w'};
} else {
$working_dir = '';
}
if ($working_dir =~ /'/) {
#
# This character will ultimately cause problems with
# system() and pipelines so we exit now.
#
exiter(sprintf(gettext(
"directory contains the single-quote character ': %s\n"),
$working_dir));
}
if (defined($opt{'B'})) {
$batch_report = 1;
} else {
$batch_report = 0;
}
if (defined($opt{'n'})) {
$do_not_follow_symlinks = 1;
} else {
$do_not_follow_symlinks = 0;
}
if (defined($opt{'L'})) {
$modify_ld_path = 0;
} else {
$modify_ld_path = 1;
}
if (defined($opt{'S'})) {
$append_solaris_dirs_to_ld_path = 1;
} else {
$append_solaris_dirs_to_ld_path = 0;
}
}
#
# Performs an initial check to see if the user supplied anything at all
# to check. Also reads in the file list if the user supplied one via -f <file>
#
sub check_item_list
{
# Add the items if the -f flag was used.
if ($file_list) {
my $file;
my $list_fh = do { local *FH; *FH };
if (-f $file_list && open($list_fh, "<$file_list")) {
while (<$list_fh>) {
chomp($file = $_);
push(@item_list, $file);
}
close($list_fh);
} else {
exiter(nofile($file_list, $!));
}
}
return if (@item_list);
emsg("$command_name: " . gettext(
"at least one file or directory to check must be specified.") .
"\n\n");
show_usage();
exiter(3);
}
#
# This subroutine sets up the working directory, the default something
# like: /tmp/appcert.<PID>
#
sub set_working_dir
{
if ($working_dir) {
# working_dir has been set in get_options().
if (! -d $working_dir) {
if (! mkpath($working_dir) || ! -d $working_dir) {
exiter(nocreatedir($working_dir, $!));
}
} else {
if (! dir_is_empty($working_dir)) {
# create a subdir of it for our use.
$working_dir = create_tmp_dir($working_dir);
}
}
} else {
# Default case: will create, e.g., /tmp/appcert.12345
$working_dir = create_tmp_dir();
}
if (! -d $working_dir) {
# We have no working directory.
exiter(nocreatedir($working_dir));
}
#
# Create a subdirectory of working_dir that will contain all of
# the object subdirs.
#
my $dir = "$working_dir/$object_dir";
if (! mkpath($dir) || ! -d $dir) {
exiter(nocreatedir($dir, $!));
}
#
# Make a tmp subdirectory for small temporary work. It is
# preferred to have it on tmpfs (especially not NFS) for
# performance reasons.
#
$tmp_dir = "/tmp/${command_name}_tmp.$$";
if (-d $tmp_dir) {
exiter(nocreatedir("$tmp_dir", $!));
}
if (! mkpath($tmp_dir, 0, 0700) || ! -d $tmp_dir) {
emsg("%s", nocreatedir($tmp_dir, $!));
# fall back to our output dir (which could have slow access)
$tmp_dir = "$working_dir/tmp";
if (! mkpath($tmp_dir)) {
exiter(nocreatedir($tmp_dir, $!));
}
}
if (! -d $tmp_dir) {
exiter(nocreatedir($tmp_dir, $!));
}
}
#
# Top level function to find all the binaries to be checked. Calls
# record_binary() to do the actual deciding and recording.
#
# The array @item_list contains all the items to find.
#
sub find_binaries
{
$binary_count = 0;
my $skipped_file = "$working_dir/Skipped";
my $skipped_fh = do { local *FH; *FH };
open($skipped_fh, ">$skipped_file") ||
exiter(nofile($skipped_file, $!));
$skipped_count = 0;
my ($item, $args, $file);
emsg("\n" . gettext(
"finding executables and shared libraries to check") . " ...\n");
$args = '';
$args .= '-follow ' unless ($do_not_follow_symlinks);
$args .= '-type f -print';
my $quote_fmt = gettext(
"skipping: item contains the single-quote character ': %s\n");
foreach $item (@item_list) {
if (! -e $item) {
emsg(gettext("skipping: %s: %s\n"), $item, $!);
print $skipped_fh "$item: no_exist\n";
$skipped_count++;
next;
} elsif ($item =~ /'/) {
emsg($quote_fmt, $item);
print $skipped_fh "$item: item_has_bad_char\n";
$skipped_count++;
next;
}
# note that $item does not contain a single-quote.
my $find_fh = do { local *FH; *FH };
open($find_fh, "$cmd_find '$item' $args|") ||
exiter(norunprog("$cmd_find '$item' $args", $!));
while (<$find_fh>) {
chomp($file = $_);
#
# We are free to remove leading "./". This will
# minimize directory names we create that would
# start with a dot.
#
$file =~ s,^\./,,;
next if ($file eq '');
record_binary($file, $skipped_fh);
}
close($find_fh);
}
if ($binary_count == 0) {
exiter("$command_name: " . gettext(
"no checkable binary objects were found."), 3);
}
if ($skipped_count == 0) {
print $skipped_fh "# NO_FILES_WERE_SKIPPED\n";
}
close($skipped_fh);
}
#
# This subroutine will determine if a binary is checkable.
#
# If so, it will reserve a directory for its output in the $working_dir
# location, and store the output of a number of commands there.
#
sub record_binary
{
my ($file, $skipped_fh) = @_;
if ((++$record_binary_call_count % 500) == 0) {
#
# This indicates are being called many times for a large
# product. Clear out our caches.
#
purge_caches();
}
#
# Check if the object exists and is regular file. Note that
# this test also passes a symlink as long as that symlink
# ultimately refers to a regular file.
#
if (! -f $file) {
emsg(gettext("skipping: not a file: %s\n"), $file);
print $skipped_fh "$file: not_a_file\n";
$skipped_count++;
return 0;
}
# Check if it is readable:
if (! -r $file) {
emsg(gettext("skipping: cannot read: %s\n"), $file);
print $skipped_fh "$file: unreadable\n";
$skipped_count++;
return 0;
}
#
# Since the filename will be used as operands passed to utility
# commands via the shell, we exclude at the outset certain meta
# characters in the filenames.
#
my $quote_fmt = gettext(
"skipping: filename contains the single-quote character: ': %s\n");
if ($file =~ /'/) {
emsg($quote_fmt, $file);
print $skipped_fh "$file: filename_has_bad_char\n";
$skipped_count++;
return 0;
}
my $newline_fmt = gettext(
"skipping: filename contains the newline character: \\n: %s\n");
if ($file =~ /\n/) {
emsg($newline_fmt, $file);
print $skipped_fh "$file: filename_has_bad_char\n";
$skipped_count++;
return 0;
}
my $pipe_fmt = gettext(
"skipping: filename contains the pipe character: \|: %s\n");
if ($file =~ /\|/) {
emsg($pipe_fmt, $file);
print $skipped_fh "$file: filename_has_bad_char\n";
$skipped_count++;
return 0;
}
my $file_output;
# Run the file(1) command on it.
c_locale(1);
# note that $file does not contain a single-quote.
$file_output = `$cmd_file '$file' 2>/dev/null`;
c_locale(0);
if ($file_output =~ /script$/) {
$file_output =~ s/:\s+/: /;
$file_output =~ s/: /: script /;
print $skipped_fh "$file_output";
#
# again now without the c_locale() setting:
# note that $file does not contain a single-quote.
#
$file_output = `$cmd_file '$file' 2>/dev/null`;
$file_output =~ s/:\s+/: /;
emsg(gettext("skipping: %s"), $file_output);
$skipped_count++;
return 0;
}
# create ELF and a.out matching regex:
my $object_match =
'ELF.*executable.*dynamically' . '|' .
'ELF.*dynamic lib' . '|' .
'ELF.*executable.*statically' . '|' .
'Sun demand paged SPARC.*dynamically linked' . '|' .
'Sun demand paged SPARC executable' . '|' .
'pure SPARC executable' . '|' .
'impure SPARC executable';
#
# Note that we let the "statically linked" binaries through
# here, but will catch them later in the profiler and checker.
#
if ($file_output !~ /$object_match/io) {
# it is not an ELF object file and so does not interest us.
return 0;
}
my $exec_fmt = gettext(
"skipping: must have exec permission to be checked: %s\n");
if (! -x $file) {
#
# It interests us, but the execute bit not set. Shared
# objects will be let through here since ldd will still
# work on them (since it uses lddstub). Otherwise, we
# cannot check it.
#
if (! is_shared_object($file)) {
# warn the user exec bit should be set:
emsg($exec_fmt, $file);
print $skipped_fh "$file: no_exec_permission\n";
$skipped_count++;
return 0;
}
}
#
# Rather than let ldd fail later on in symprof, we check the
# arch here to make sure it matches $uname_p. If it does not
# match, we anticipate a 64-bit application and so we
# immediately test how ldd will handle it (kernel might be
# 32-bit, etc).
#
my ($arch, $type, $wordsize, $endian, $e_machine) = bin_type($file);
if ($arch !~ /^${uname_p}$/io) {
my ($ldd_output, $ldd_output2);
#
# Now run ldd on it to see how things would go. If it
# fails we must skip it.
#
c_locale(1);
# note that $file does not contain single-quote
$ldd_output = `$cmd_ldd '$file' 2>&1 1>/dev/null`;
c_locale(0);
if ($? != 0) {
# note that $file does not contain a single-quote
$ldd_output2 = `$cmd_ldd '$file' 2>&1 1>/dev/null`;
$ldd_output =~ s/\n.*$//;
$ldd_output2 =~ s/\n.*$//;
if ($ldd_output !~ /wrong class/) {
$ldd_output = "$file: " . sprintf(
gettext("ldd failed for arch: %s"), $arch);
$ldd_output2 = $ldd_output;
} else {
$ldd_output .= " ($arch)";
$ldd_output2 .= " ($arch)";
}
$ldd_output =~ s/:\s+/: /;
$ldd_output2 =~ s/:\s+/: /;
emsg(gettext("skipping: %s\n"), $ldd_output2);
$ldd_output =~ s/: /: ldd_failed /;
print $skipped_fh "$ldd_output\n";
$skipped_count++;
return 0;
}
}
# From this point on, object is one we decided to check.
# Create the directory name for this object:
my $dirname = object_to_dir_name($file);
my $dirpath = "$working_dir/$dirname";
my $early_fmt = gettext(
"skipping: %s referenced earlier on the command line\n");
if (-e $dirpath) {
#
# Directory already exists. We assume this means the
# user listed it twice (possibly indirectly via "find").
#
emsg($early_fmt, $file);
return 0;
}
if (! mkdir($dirpath, 0777)) {
exiter(nocreatedir($dirpath, $!));
}
$binary_count++;
# Record binary object's location:
my $path_fh = do { local *FH; *FH };
open($path_fh, ">$dirpath/info.path") ||
exiter(nofile("$dirpath/info.path", $!));
print $path_fh $file, "\n";
close($path_fh);
#
# Record /usr/bin/file output. Note that the programmatical way
# to access this info is through the command cmd_output_file().
#
my $file_fh = do { local *FH; *FH };
open($file_fh, ">$dirpath/info.file") ||
exiter(nofile("$dirpath/info.file", $!));
print $file_fh $file_output;
close($file_fh);
#
# Record dump -Lv output. Note that the programmatical way to
# access this info is through the command cmd_output_dump().
#
my $dump_fh = do { local *FH; *FH };
open($dump_fh, ">$dirpath/info.dump") ||
exiter(nofile("$dirpath/info.dump", $!));
my $dump_output;
c_locale(1);
# note that $file does not contain a single-quote
$dump_output = `$cmd_dump -Lv '$file' 2>&1`;
c_locale(0);
print $dump_fh $dump_output;
close($dump_fh);
#
# Record arch and etc binary type.
#
my $arch_fh = do { local *FH; *FH };
open($arch_fh, ">$dirpath/info.arch") ||
exiter(nofile("$dirpath/info.arch", $!));
if ($arch eq 'unknown') {
my $tmp = $file_output;
chomp($tmp);
emsg(gettext("warning: cannot determine arch: %s\n"), $tmp);
}
print $arch_fh "ARCH: $arch\n";
print $arch_fh "TYPE: $type\n";
print $arch_fh "WORDSIZE: $wordsize\n";
print $arch_fh "BYTEORDER: $endian\n";
print $arch_fh "E_MACHINE: $e_machine\n";
close($arch_fh);
# Record the file -> directory name mapping in the index file.
my $index_file = "$working_dir/Index";
my $index_fh = do { local *FH; *FH };
open($index_fh, ">>$index_file") ||
exiter(nofile($index_file, $!));
print $index_fh "$file => $dirname\n";
close($index_fh);
return 1;
}
#
# Prints the usage statement to standard out.
#
sub show_usage
{
emsg(gettext(
"usage: appcert [ -nBLS ] [ -f file ] [ -w dir ] { obj | dir } ...\n" .
" Examine binary object files for use of private Solaris\n" .
" interfaces, unstable use of static linking, and other\n" .
" unstable practices.\n")
);
}
#
# Examines the set of binaries to be checked and notes which ones are
# shared libraries. Constructs a LD_LIBRARY_PATH that would find ALL of
# these shared objects. The new directories are placed at the END of the
# current LD_LIBRARY_PATH (if any).
#
sub supplement_ld_library_path
{
my (@orig, @add_product, @add_solaris, %ldpath);
# First, note the current LD_LIBRARY_PATH parts:
my $dirname;
if (defined($ENV{'LD_LIBRARY_PATH'})) {
foreach $dirname (split(/:/, $ENV{'LD_LIBRARY_PATH'})) {
if (! exists($ldpath{$dirname})) {
push(@orig, $dirname);
$ldpath{$dirname} = 1;
}
}
}
# Next, search for ELF shared objects.
my ($dir, $path);
if ($modify_ld_path) {
while (defined($dir = next_dir_name())) {
$path = dir_name_to_path($dir);
$dirname = dirname($path);
next if (exists($ldpath{$dirname}));
#
# A colon ":" in directory name is cannot be
# accepted because that is the LD_LIBRARY_PATH
# separator.
#
next if ($dirname =~ /:/);
if (is_shared_object($path)) {
if (! exists($ldpath{$dirname})) {
push(@add_product, $dirname);
$ldpath{$dirname} = 1;
}
}
}
}
if ($append_solaris_dirs_to_ld_path) {
foreach $dirname (split(/:/, $solaris_library_ld_path)) {
if (! exists($ldpath{$dirname})) {
push(@add_solaris, $dirname);
$ldpath{$dirname} = 1;
}
}
}
# modify the LD_LIBRARY_PATH:
if (@add_product || @add_solaris) {
$ENV{'LD_LIBRARY_PATH'} =
join(':', (@orig, @add_product, @add_solaris));
}
emsg("\n");
if (@add_product) {
emsg(gettext(
"Shared libraries were found in the application and the\n" .
"following directories are appended to LD_LIBRARY_PATH:\n"
) . "\n");
foreach $dir (@add_product) {
$dir = "./$dir" unless ($dir =~ m,^/,);
emsg(" $dir\n");
}
emsg("\n");
}
if (@add_solaris) {
emsg(gettext(
"These Solaris library directories are being appended\n" .
"to LD_LIBRARY_PATH:\n") . "\n");
foreach $dir (@add_solaris) {
emsg(" $dir\n");
}
emsg("\n");
}
}
#
# Everything is correctly exported by now, and so we just run "symprof".
# It is run in batches of $block_size binaries to minimize the effect of
# memory usage caused by huge binaries in the product to be checked.
#
sub run_profiler
{
my $block_size = 20;
my $i = 0;
# record old values of the blocks (if any)
my $env_min = $ENV{'AC_BLOCK_MIN'};
my $env_max = $ENV{'AC_BLOCK_MAX'};
while ($i < $binary_count) { # do each block
# export our symprof values of the block limits
$ENV{'AC_BLOCK_MIN'} = $i;
$ENV{'AC_BLOCK_MAX'} = $i + $block_size;
run_symprof();
$i += $block_size;
}
# restore old values of the blocks (if any)
if (defined($env_min)) {
$ENV{'AC_BLOCK_MIN'} = $env_min;
} else {
delete $ENV{'AC_BLOCK_MIN'};
}
if (defined($env_max)) {
$ENV{'AC_BLOCK_MAX'} = $env_max;
} else {
delete $ENV{'AC_BLOCK_MAX'};
}
}
#
# Sub that actually runs "symprof".
#
sub run_symprof
{
system("$appcert_lib_dir/symprof");
if ($? != 0) {
emsg("%s", utilityfailed("symprof"));
clean_up_exit(1);
}
}
#
# Sub to run "symcheck".
#
sub run_checker
{
system("$appcert_lib_dir/symcheck");
if ($? != 0) {
emsg("%s", utilityfailed("symcheck"));
clean_up_exit(1);
}
}
#
# Sub to run "symreport".
#
sub run_report_generator
{
system("$appcert_lib_dir/symreport");
if ($? != 0) {
emsg("%s", utilityfailed("symreport"));
clean_up_exit(1);
}
}
#
# General routine to be called if one of our utility programs (symprof,
# symcheck, symreport) failed (that is, return != 0). returns the
# formatted error message string to pass to the user.
#
sub utilityfailed
{
my ($prog) = @_;
my $fmt;
$fmt = "\n *** " . gettext("utility program failed: %s\n");
return sprintf($fmt, $prog);
}
#
# Does the cleanup and then exits with return code $rc. The utility
# subroutine exiter() will call this subroutine. No general cleanup is
# performed if exiting with error ($rc > 0) so that the user can examine
# at the output files, etc.
#
sub clean_up_exit
{
my ($rc) = @_;
if ($rc != 0) {
working_dir_msg();
} else {
clean_up();
}
exit $rc;
}
#
# General cleanup routine.
#
sub clean_up
{
if (-d $tmp_dir && ($tmp_dir !~ m,^/+$,)) {
rmdir($tmp_dir);
}
}
#
# Routine that is called when an error has occurred. It indicates to
# user where the working and/or temporary directory is and that they are
# not being removed.
#
sub working_dir_msg
{
my @dirlist;
emsg("\n");
if (defined($working_dir) && -d $working_dir) {
push(@dirlist, $working_dir);
}
if (defined($tmp_dir) && -d $tmp_dir) {
push(@dirlist, $tmp_dir);
}
return if (! @dirlist);
emsg(gettext(
"Note that the temporary working directories still exist:") .
"\n\n");
my $dir;
# show the user explicitly which directories remains:
foreach $dir (@dirlist) {
system($cmd_ls, '-ld', $dir);
}
emsg("\n");
}
#
# Signal handler for interruptions (E.g. Ctrl-C SIGINT).
#
sub interrupted
{
$SIG{$_[0]} = 'IGNORE';
exit 1 if ($caught_signal);
$caught_signal = 1;
signals('off');
emsg("\n** " . gettext("interrupted") . " **\n");
clean_up_exit(1);
}