appcert/scripts/appcert.pl

	appcert.pl revision 7c478bd95313f5f23a4c958a745db2134aa03244
#!/usr/perl5/bin/perl -w
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License, Version 1.0 only
# (the "License").  You may not use this file except in compliance
# with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# ident "%Z%%M% %I% %E% SMI"
#
# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

#
# This is the top level script for performing the appcert checks.  It
# reads the command line options, determines list of binaries to check,
# and then calls symprof (the raw symbol profiler), symcheck (that
# checks for unstable behavior), and symreport (that constructs and
# outputs a rollup report)
#

require 5.005;
use strict;
use locale;
use Getopt::Std;
use POSIX qw(locale_h);
use Sun::Solaris::Utils qw(textdomain gettext);
use File::Basename;
use File::Path;

use lib qw(/usr/lib/abi/appcert);
use AppcertUtil;

setlocale(LC_ALL, "");
textdomain(TEXT_DOMAIN);

use vars qw(
    @item_list
    $file_list
    $do_not_follow_symlinks
    $modify_ld_path
    $append_solaris_dirs_to_ld_path
    $skipped_count
);

my $caught_signal = 0;
my $record_binary_call_count = 0;

# The directory where the appcert specific scripts and data reside:
$appcert_lib_dir = "/usr/lib/abi/appcert";

set_clean_up_exit_routine(\&clean_up_exit);

signals('on', \&interrupted);

get_options();

@item_list = @ARGV;     # List of directories and/or objects to check.
check_item_list();

set_working_dir();

find_binaries();        # Records all of the binary objects to check.

supplement_ld_library_path();

export_vars_to_environment();   # Exports info for our child scripts to use.

run_profiler();         # Run the script symprof.

run_checker();          # Run script symcheck.

run_report_generator();     # Run the script symreport.

my $rc = overall_result_code();

clean_up();

exit $rc;


#
# This subroutine calls getopts() and sets up variables reflecting how
# we were called.
#
sub get_options
{
    my %opt;

    getopts('?hnLBSw:f:', \%opt) || (show_usage() && exiter(2));

    if (exists($opt{'?'}) || exists($opt{'h'})) {
        show_usage();
        exiter(2);
    }

    if (exists($opt{'f'})) {
        $file_list = $opt{'f'};
    } else {
        $file_list = '';
    }

    if (exists($opt{'w'})) {
        $working_dir = $opt{'w'};
    } else {
        $working_dir = '';
    }
    if ($working_dir =~ /'/) {
        #
        # This character will ultimately cause problems with
        # system() and pipelines so we exit now.
        #
        exiter(sprintf(gettext(
            "directory contains the single-quote character ': %s\n"),
            $working_dir));
    }

    if (defined($opt{'B'})) {
        $batch_report = 1;
    } else {
        $batch_report = 0;
    }

    if (defined($opt{'n'})) {
        $do_not_follow_symlinks = 1;
    } else {
        $do_not_follow_symlinks = 0;
    }

    if (defined($opt{'L'})) {
        $modify_ld_path = 0;
    } else {
        $modify_ld_path = 1;
    }

    if (defined($opt{'S'})) {
        $append_solaris_dirs_to_ld_path = 1;
    } else {
        $append_solaris_dirs_to_ld_path = 0;
    }
}

#
# Performs an initial check to see if the user supplied anything at all
# to check.  Also reads in the file list if the user supplied one via -f <file>
#
sub check_item_list
{
    # Add the items if the -f flag was used.
    if ($file_list) {
        my $file;
        my $list_fh = do { local *FH; *FH };
        if (-f $file_list && open($list_fh, "<$file_list")) {
            while (<$list_fh>) {
                chomp($file = $_);
                push(@item_list, $file);
            }
            close($list_fh);
        } else {
            exiter(nofile($file_list, $!));
        }
    }

    return if (@item_list);

    emsg("$command_name: " . gettext(
        "at least one file or directory to check must be specified.") .
        "\n\n");

    show_usage();
    exiter(3);
}

#
# This subroutine sets up the working directory, the default something
# like: /tmp/appcert.<PID>
#
sub set_working_dir
{
    if ($working_dir) {
        # working_dir has been set in get_options().
        if (! -d $working_dir) {
            if (! mkpath($working_dir) || ! -d $working_dir) {
                exiter(nocreatedir($working_dir, $!));
            }
        } else {
            if (! dir_is_empty($working_dir)) {
                # create a subdir of it for our use.
                $working_dir = create_tmp_dir($working_dir);
            }
        }
    } else {
        # Default case: will create, e.g., /tmp/appcert.12345
        $working_dir = create_tmp_dir();
    }

    if (! -d $working_dir) {
        # We have no working directory.
        exiter(nocreatedir($working_dir));
    }

    #
    # Create a subdirectory of working_dir that will contain all of
    # the object subdirs.
    #
    my $dir = "$working_dir/$object_dir";
    if (! mkpath($dir) || ! -d $dir) {
        exiter(nocreatedir($dir, $!));
    }
    #
    # Make a tmp subdirectory for small temporary work. It is
    # preferred to have it on tmpfs (especially not NFS) for
    # performance reasons.
    #
    $tmp_dir = "/tmp/${command_name}_tmp.$$";
    if (-d $tmp_dir) {
        exiter(nocreatedir("$tmp_dir", $!));
    }
    if (! mkpath($tmp_dir, 0, 0700) || ! -d $tmp_dir) {
        emsg("%s", nocreatedir($tmp_dir, $!));
        # fall back to our output dir (which could have slow access)
        $tmp_dir = "$working_dir/tmp";
        if (! mkpath($tmp_dir)) {
            exiter(nocreatedir($tmp_dir, $!));
        }
    }

    if (! -d $tmp_dir) {
        exiter(nocreatedir($tmp_dir, $!));
    }
}

#
# Top level function to find all the binaries to be checked.  Calls
# record_binary() to do the actual deciding and recording.
#
# The array @item_list contains all the items to find.
#
sub find_binaries
{
    $binary_count = 0;

    my $skipped_file = "$working_dir/Skipped";
    my $skipped_fh = do { local *FH; *FH };
    open($skipped_fh, ">$skipped_file") ||
        exiter(nofile($skipped_file, $!));

    $skipped_count = 0;

    my ($item, $args, $file);
    emsg("\n" .  gettext(
        "finding executables and shared libraries to check") . " ...\n");

    $args = '';
    $args .= '-follow ' unless ($do_not_follow_symlinks);
    $args .= '-type f -print';

    my $quote_fmt = gettext(
        "skipping:  item contains the single-quote character ': %s\n");

    foreach $item (@item_list) {
        if (! -e $item) {
            emsg(gettext("skipping:  %s: %s\n"), $item, $!);
            print $skipped_fh "$item: no_exist\n";
            $skipped_count++;
            next;
        } elsif ($item =~ /'/)  {
            emsg($quote_fmt, $item);
            print $skipped_fh "$item: item_has_bad_char\n";
            $skipped_count++;
            next;
        }
        # note that $item does not contain a single-quote.
        my $find_fh = do { local *FH; *FH };
        open($find_fh, "$cmd_find '$item' $args|") ||
            exiter(norunprog("$cmd_find '$item' $args", $!));

        while (<$find_fh>) {
            chomp($file = $_);
            #
            # We are free to remove leading "./". This will
            # minimize directory names we create that would
            # start with a dot.
            #
            $file =~ s,^\./,,;

            next if ($file eq '');

            record_binary($file, $skipped_fh);
        }
        close($find_fh);
    }

    if ($binary_count == 0) {
        exiter("$command_name: " . gettext(
            "no checkable binary objects were found."), 3);
    }

    if ($skipped_count == 0) {
        print $skipped_fh "# NO_FILES_WERE_SKIPPED\n";
    }
    close($skipped_fh);
}

#
# This subroutine will determine if a binary is checkable.
#
# If so, it will reserve a directory for its output in the $working_dir
# location, and store the output of a number of commands there.
#
sub record_binary
{
    my ($file, $skipped_fh) = @_;

    if ((++$record_binary_call_count % 500) == 0) {
        #
        # This indicates are being called many times for a large
        # product.  Clear out our caches.
        #
        purge_caches();
    }

    #
    # Check if the object exists and is regular file.  Note that
    # this test also passes a symlink as long as that symlink
    # ultimately refers to a regular file.
    #
    if (! -f $file) {
        emsg(gettext("skipping:  not a file: %s\n"), $file);
        print $skipped_fh "$file: not_a_file\n";
        $skipped_count++;
        return 0;
    }

    # Check if it is readable:
    if (! -r $file) {
        emsg(gettext("skipping:  cannot read: %s\n"), $file);
        print $skipped_fh "$file: unreadable\n";
        $skipped_count++;
        return 0;
    }

    #
    # Since the filename will be used as operands passed to utility
    # commands via the shell, we exclude at the outset certain meta
    # characters in the filenames.
    #
    my $quote_fmt = gettext(
        "skipping:  filename contains the single-quote character: ': %s\n");
    if ($file =~ /'/) {
        emsg($quote_fmt, $file);
        print $skipped_fh "$file: filename_has_bad_char\n";
        $skipped_count++;
        return 0;
    }

    my $newline_fmt = gettext(
        "skipping:  filename contains the newline character: \\n: %s\n");
    if ($file =~ /\n/) {
        emsg($newline_fmt, $file);
        print $skipped_fh "$file: filename_has_bad_char\n";
        $skipped_count++;
        return 0;
    }

    my $pipe_fmt = gettext(
        "skipping:  filename contains the pipe character: \|: %s\n");
    if ($file =~ /\|/) {
        emsg($pipe_fmt, $file);
        print $skipped_fh "$file: filename_has_bad_char\n";
        $skipped_count++;
        return 0;
    }

    my $file_output;

    # Run the file(1) command on it.

    c_locale(1);
    # note that $file does not contain a single-quote.
    $file_output = `$cmd_file '$file' 2>/dev/null`;
    c_locale(0);

    if ($file_output =~ /script$/) {
        $file_output =~ s/:\s+/: /;
        $file_output =~ s/: /: script /;
        print $skipped_fh "$file_output";

        #
        # again now without the c_locale() setting:
        # note that $file does not contain a single-quote.
        #
        $file_output = `$cmd_file '$file' 2>/dev/null`;
        $file_output =~ s/:\s+/: /;
        emsg(gettext("skipping:  %s"), $file_output);
        $skipped_count++;
        return 0;
    }

    # create ELF and a.out matching regex:
    my $object_match =
        'ELF.*executable.*dynamically' . '|' .
        'ELF.*dynamic lib' . '|' .
        'ELF.*executable.*statically' . '|' .
        'Sun demand paged SPARC.*dynamically linked' . '|' .
        'Sun demand paged SPARC executable' . '|' .
        'pure SPARC executable' . '|' .
        'impure SPARC executable';

    #
    # Note that we let the "statically linked" binaries through
    # here, but will catch them later in the profiler and checker.
    #

    if ($file_output !~ /$object_match/io) {
        # it is not an ELF object file and so does not interest us.
        return 0;
    }

    my $exec_fmt = gettext(
        "skipping:  must have exec permission to be checked: %s\n");
    if (! -x $file) {
        #
        # It interests us, but the execute bit not set.  Shared
        # objects will be let through here since ldd will still
        # work on them (since it uses lddstub).  Otherwise, we
        # cannot check it.
        #
        if (! is_shared_object($file)) {
            # warn the user exec bit should be set:
            emsg($exec_fmt, $file);
            print $skipped_fh "$file: no_exec_permission\n";
            $skipped_count++;
            return 0;
        }
    }

    #
    # Rather than let ldd fail later on in symprof, we check the
    # arch here to make sure it matches $uname_p.  If it does not
    # match, we anticipate a 64-bit application and so we
    # immediately test how ldd will handle it (kernel might be
    # 32-bit, etc).
    #
    my ($arch, $type, $wordsize, $endian, $e_machine) = bin_type($file);

    if ($arch !~ /^${uname_p}$/io) {
        my ($ldd_output, $ldd_output2);

        #
        # Now run ldd on it to see how things would go.  If it
        # fails we must skip it.
        #
        c_locale(1);
        # note that $file does not contain single-quote
        $ldd_output = `$cmd_ldd '$file' 2>&1 1>/dev/null`;
        c_locale(0);
        if ($? != 0) {
            # note that $file does not contain a single-quote
            $ldd_output2 = `$cmd_ldd '$file' 2>&1 1>/dev/null`;
            $ldd_output =~ s/\n.*$//;
            $ldd_output2    =~ s/\n.*$//;
            if ($ldd_output !~ /wrong class/) {
                $ldd_output = "$file: " . sprintf(
                    gettext("ldd failed for arch: %s"), $arch);
                $ldd_output2 = $ldd_output;
            } else {
                $ldd_output .= " ($arch)";
                $ldd_output2    .= " ($arch)";
            }
            $ldd_output =~ s/:\s+/: /;
            $ldd_output2    =~ s/:\s+/: /;
            emsg(gettext("skipping:  %s\n"), $ldd_output2);
            $ldd_output =~ s/: /: ldd_failed /;
            print $skipped_fh "$ldd_output\n";
            $skipped_count++;
            return 0;
        }
    }

    # From this point on, object is one we decided to check.

    # Create the directory name for this object:
    my $dirname = object_to_dir_name($file);
    my $dirpath = "$working_dir/$dirname";
    my $early_fmt = gettext(
        "skipping:  %s referenced earlier on the command line\n");
    if (-e $dirpath) {
        #
        # Directory already exists.  We assume this means the
        # user listed it twice (possibly indirectly via "find").
        #
        emsg($early_fmt, $file);
        return 0;
    }

    if (! mkdir($dirpath, 0777)) {
        exiter(nocreatedir($dirpath, $!));
    }

    $binary_count++;

    # Record binary object's location:
    my $path_fh = do { local *FH; *FH };
    open($path_fh, ">$dirpath/info.path") ||
        exiter(nofile("$dirpath/info.path", $!));
    print $path_fh $file, "\n";
    close($path_fh);

    #
    # Record /usr/bin/file output.  Note that the programmatical way
    # to access this info is through the command cmd_output_file().
    #
    my $file_fh = do { local *FH; *FH };
    open($file_fh, ">$dirpath/info.file") ||
        exiter(nofile("$dirpath/info.file", $!));
    print $file_fh $file_output;
    close($file_fh);

    #
    # Record dump -Lv output.  Note that the programmatical way to
    # access this info is through the command cmd_output_dump().
    #
    my $dump_fh = do { local *FH; *FH };
    open($dump_fh, ">$dirpath/info.dump") ||
        exiter(nofile("$dirpath/info.dump", $!));

    my $dump_output;
    c_locale(1);
    # note that $file does not contain a single-quote
    $dump_output = `$cmd_dump -Lv '$file' 2>&1`;
    c_locale(0);
    print $dump_fh $dump_output;
    close($dump_fh);

    #
    # Record arch and etc binary type.
    #
    my $arch_fh = do { local *FH; *FH };
    open($arch_fh, ">$dirpath/info.arch") ||
        exiter(nofile("$dirpath/info.arch", $!));

    if ($arch eq 'unknown') {
        my $tmp = $file_output;
        chomp($tmp);
        emsg(gettext("warning:   cannot determine arch: %s\n"), $tmp);
    }

    print $arch_fh "ARCH: $arch\n";
    print $arch_fh "TYPE: $type\n";
    print $arch_fh "WORDSIZE: $wordsize\n";
    print $arch_fh "BYTEORDER: $endian\n";
    print $arch_fh "E_MACHINE: $e_machine\n";
    close($arch_fh);

    # Record the file -> directory name mapping in the index file.
    my $index_file   = "$working_dir/Index";
    my $index_fh = do { local *FH; *FH };
    open($index_fh, ">>$index_file") ||
        exiter(nofile($index_file, $!));
    print $index_fh "$file => $dirname\n";
    close($index_fh);

    return 1;
}

#
# Prints the usage statement to standard out.
#
sub show_usage
{
    emsg(gettext(
    "usage: appcert [ -nBLS ] [ -f file ] [ -w dir ] { obj | dir } ...\n" .
    "   Examine binary object files for use of private Solaris\n" .
    "   interfaces, unstable use of static linking, and other\n" .
    "   unstable practices.\n")
    );
}

#
# Examines the set of binaries to be checked and notes which ones are
# shared libraries. Constructs a LD_LIBRARY_PATH that would find ALL of
# these shared objects. The new directories are placed at the END of the
# current LD_LIBRARY_PATH (if any).
#
sub supplement_ld_library_path
{
    my (@orig, @add_product, @add_solaris, %ldpath);

    # First, note the current LD_LIBRARY_PATH parts:

    my $dirname;
    if (defined($ENV{'LD_LIBRARY_PATH'})) {
        foreach $dirname (split(/:/, $ENV{'LD_LIBRARY_PATH'})) {
            if (! exists($ldpath{$dirname})) {
                push(@orig, $dirname);
                $ldpath{$dirname} = 1;
            }
        }
    }

    # Next, search for ELF shared objects.
    my ($dir, $path);

    if ($modify_ld_path) {
        while (defined($dir = next_dir_name())) {
            $path = dir_name_to_path($dir);

            $dirname = dirname($path);
            next if (exists($ldpath{$dirname}));

            #
            # A colon ":" in directory name is cannot be
            # accepted because that is the LD_LIBRARY_PATH
            # separator.
            #
            next if ($dirname =~ /:/);

            if (is_shared_object($path)) {
                if (! exists($ldpath{$dirname})) {
                    push(@add_product, $dirname);
                    $ldpath{$dirname} = 1;
                }
            }
        }
    }

    if ($append_solaris_dirs_to_ld_path) {
        foreach $dirname (split(/:/, $solaris_library_ld_path)) {
            if (! exists($ldpath{$dirname})) {
                push(@add_solaris, $dirname);
                $ldpath{$dirname} = 1;
            }
        }
    }

    # modify the LD_LIBRARY_PATH:
    if (@add_product || @add_solaris) {
        $ENV{'LD_LIBRARY_PATH'} =
            join(':', (@orig, @add_product, @add_solaris));
    }

    emsg("\n");
    if (@add_product) {
        emsg(gettext(
            "Shared libraries were found in the application and the\n" .
            "following directories are appended to LD_LIBRARY_PATH:\n"
            ) . "\n");

        foreach $dir (@add_product) {
            $dir = "./$dir" unless ($dir =~ m,^/,);
            emsg("   $dir\n");
        }
        emsg("\n");
    }

    if (@add_solaris) {
        emsg(gettext(
            "These Solaris library directories are being appended\n" .
            "to LD_LIBRARY_PATH:\n") . "\n");

        foreach $dir (@add_solaris) {
            emsg("   $dir\n");
        }
        emsg("\n");
    }
}

#
# Everything is correctly exported by now, and so we just run "symprof".
# It is run in batches of $block_size binaries to minimize the effect of
# memory usage caused by huge binaries in the product to be checked.
#
sub run_profiler
{
    my $block_size = 20;

    my $i = 0;

    # record old values of the blocks (if any)
    my $env_min = $ENV{'AC_BLOCK_MIN'};
    my $env_max = $ENV{'AC_BLOCK_MAX'};

    while ($i < $binary_count) { # do each block
        # export our symprof values of the block limits
        $ENV{'AC_BLOCK_MIN'} = $i;
        $ENV{'AC_BLOCK_MAX'} = $i + $block_size;

        run_symprof();

        $i += $block_size;
    }

    # restore old values of the blocks (if any)
    if (defined($env_min)) {
        $ENV{'AC_BLOCK_MIN'} = $env_min;
    } else {
        delete $ENV{'AC_BLOCK_MIN'};
    }
    if (defined($env_max)) {
        $ENV{'AC_BLOCK_MAX'} = $env_max;
    } else {
        delete $ENV{'AC_BLOCK_MAX'};
    }
}

#
# Sub that actually runs "symprof".
#
sub run_symprof
{
    system("$appcert_lib_dir/symprof");
    if ($? != 0) {
        emsg("%s", utilityfailed("symprof"));
        clean_up_exit(1);
    }
}

#
# Sub to run "symcheck".
#
sub run_checker
{
    system("$appcert_lib_dir/symcheck");
    if ($? != 0) {
        emsg("%s", utilityfailed("symcheck"));
        clean_up_exit(1);
    }
}

#
# Sub to run "symreport".
#
sub run_report_generator
{
    system("$appcert_lib_dir/symreport");
    if ($? != 0) {
        emsg("%s", utilityfailed("symreport"));
        clean_up_exit(1);
    }
}

#
# General routine to be called if one of our utility programs (symprof,
# symcheck, symreport) failed (that is, return != 0).  returns the
# formatted error message string to pass to the user.
#
sub utilityfailed
{
    my ($prog) = @_;
    my $fmt;
    $fmt = "\n *** " . gettext("utility program failed: %s\n");
    return sprintf($fmt, $prog);
}

#
# Does the cleanup and then exits with return code $rc.  The utility
# subroutine exiter() will call this subroutine.  No general cleanup is
# performed if exiting with error ($rc > 0) so that the user can examine
# at the output files, etc.
#
sub clean_up_exit
{
    my ($rc) = @_;

    if ($rc != 0) {
        working_dir_msg();
    } else {
        clean_up();
    }

    exit $rc;
}

#
# General cleanup routine.
#
sub clean_up
{
    if (-d $tmp_dir && ($tmp_dir !~ m,^/+$,)) {
        rmdir($tmp_dir);
    }
}

#
# Routine that is called when an error has occurred.  It indicates to
# user where the working and/or temporary directory is and that they are
# not being removed.
#
sub working_dir_msg
{

    my @dirlist;
    emsg("\n");
    if (defined($working_dir) && -d $working_dir) {
        push(@dirlist, $working_dir);
    }
    if (defined($tmp_dir) && -d $tmp_dir) {
        push(@dirlist, $tmp_dir);
    }

    return if (! @dirlist);

    emsg(gettext(
        "Note that the temporary working directories still exist:") .
        "\n\n");

    my $dir;
    # show the user explicitly which directories remains:
    foreach $dir (@dirlist) {
        system($cmd_ls, '-ld', $dir);
    }

    emsg("\n");
}

#
# Signal handler for interruptions (E.g. Ctrl-C SIGINT).
#
sub interrupted
{
    $SIG{$_[0]} = 'IGNORE';

    exit 1 if ($caught_signal);
    $caught_signal = 1;

    signals('off');
    emsg("\n** " . gettext("interrupted") . " **\n");

    clean_up_exit(1);
}