validate_pkg.py revision 3be32116258babbbdd231a5bba8e6e58e87daa99
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Compare the content generated by a build to a set of manifests
# describing how that content is to be delivered.
#
import getopt
import os
import stat
import sys
#
# Dictionary used to map action names to output format. Each entry is
# indexed by action name, and consists of a list of tuples that map
# FileInfo class members to output labels.
#
OUTPUTMAP = {
"dir": [
("group", "group="),
("mode", "mode="),
("owner", "owner="),
("path", "path=")
],
"file": [
("hash", ""),
("group", "group="),
("mode", "mode="),
("owner", "owner="),
("path", "path=")
],
"link": [
("mediator", "mediator="),
("path", "path="),
("target", "target=")
],
"hardlink": [
("path", "path="),
("hardkey", "target=")
],
}
# Mode checks used to validate safe file and directory permissions
"""Base class to represent a file.
Subclassed according to whether the file represents an actual filesystem
object (RealFileInfo) or an IPS manifest action (ActionInfo).
"""
"""Return the IPS action name of a FileInfo object.
"""
return "dir"
return "link"
return "hardlink"
return "file"
"""Check for and report on unsafe permissions.
Returns a potentially empty list of warning strings.
"""
w = []
if t in ("link", "hardlink"):
return w
"readable by group or other" % (p, m)])
((t == "file" and not e or t == "dir" and o == "bin") and
w.extend(["%s: owner \"%s\" may be safely " \
"changed to \"root\"" % (p, o)])
w.extend(["%s: non-root-owned executable should not " \
"also be writable by owner." % p])
if ("m" in modechecks and
w.extend(["%s: 0%o: should not be writable by group or other" %
(p, m)])
return w
"""Compare two FileInfo objects.
Note this is the "not equal" comparison, so a return value of False
indicates that the objects are functionally equivalent.
"""
#
# Map the objects such that the lhs is always the ActionInfo,
# and the rhs is always the RealFileInfo.
#
# It's only really important that the rhs not be an
# ActionInfo; if we're comparing FileInfo the RealFileInfo, it
# won't actually matter what we choose.
#
else:
#
# Because the manifest may legitimately translate a relative
# path from the proto area into a different path on the installed
# system, we don't compare paths here. We only expect this comparison
# to be invoked on items with identical relative paths in
# first place.
#
#
# All comparisons depend on type. For symlink and directory, they
# must be the same. For file and hardlink, see below.
#
return True
#
# For symlinks, all that's left is the link target.
# For mediated symlinks targets can differ.
#
if typelhs == "link":
#
# For a directory, it's important that both be directories,
# the modes be identical, and the paths are identical. We already
# checked all but the modes above.
#
# If both objects are files, then we're in the same boat.
#
#
# For files or hardlinks:
#
# Since the key space is different (inodes for real files and
# actual link targets for hard links), and since the proto area will
# identify all N occurrences as hardlinks, but the manifests as one
# file and N-1 hardlinks, we have to compare files to hardlinks.
#
#
# If they're both hardlinks, we just make sure that
# the same target path appears in both sets of
# possible targets.
#
#
# Otherwise, we have a mix of file and hardlink, so we
# need to make sure that the file path appears in the
# set of possible target paths for the hardlink.
#
# We already know that the ActionInfo, if present, is the lhs
# operator. So it's the rhs operator that's guaranteed to
# have a set of hardpaths.
#
"""Return an action-style representation of a FileInfo object.
We don't currently quote items with embedded spaces. If we
ever decide to parse this output, we'll want to revisit that.
"""
return out
"""Return a protolist-style representation of a FileInfo object.
"""
target = "-"
major = "-"
minor = "-"
if name == "dir":
ftype = "d"
ftype = "f"
elif name == "link":
ftype = "s"
mode = "777"
owner = "root"
group = "other"
out = "%c %-30s %-20s %4s %-5s %-5s %6d %2ld - -" % \
return out
class ActionInfo(FileInfo):
"""Object to track information about manifest actions.
This currently understands file, link, dir, and hardlink actions.
"""
#
# Currently, all actions that we support have a "path"
# attribute. If that changes, then we'll need to
# catch a KeyError from this assignment.
#
"""Indicates whether the specified IPS action time is
correctly handled by the ActionInfo constructor.
"""
class UnsupportedFileFormatError(Exception):
"""This means that the stat.S_IFMT returned something we don't
support, ie a pipe or socket. If it's appropriate for such an
object to be in the proto area, then the RealFileInfo constructor
will need to evolve to support it, or it will need to be in the
exception list.
"""
class RealFileInfo(FileInfo):
"""Object to track important-to-packaging file information.
This currently handles regular files, directories, and symbolic links.
For multiple RealFileInfo objects with identical hardkeys, there
is no way to determine which of the hard links should be
delivered as a file, and which as hardlinks.
"""
#
# Per stat.py, these cases are mutually exclusive.
#
else:
#
# Instead of reading the group and owner from the proto area after
# a non-root build, just drop in dummy values. Since we don't
# compare them anywhere, this should allow at least marginally
# useful comparisons of protolist-style output.
#
#
# refcount > 1 indicates a hard link
#
#
# This could get ugly if multiple proto areas reside
# on different filesystems.
#
class DirectoryTree(dict):
"""Meant to be subclassed according to population method.
"""
"""Compare two different sets of FileInfo objects.
"""
if onlykeys1:
print "Entries present in %s but not %s:" % \
print ""
if onlykeys2:
print "Entries present in %s but not %s:" % \
print ""
if nodifferences:
print "Entries that differ between %s and %s:" \
if not nodifferences:
print ""
class BadProtolistFormat(Exception):
"""This means that the user supplied a file via -l, but at least
one line from that file doesn't have the right number of fields to
parse as protolist output.
"""
class ProtoTree(DirectoryTree):
"""Describes one or more proto directories as a dictionary of
RealFileInfo objects, indexed by relative path.
"""
"""Extends the ProtoTree dictionary with RealFileInfo
objects describing the proto dir, indexed by relative
path.
"""
newentries = {}
if path not in exceptions:
try:
except OSError, e:
(path, e))
continue
else:
#
# Find the sets of paths in this proto dir that are hardlinks
# to the same inode.
#
# It seems wasteful to store this in each FileInfo, but we
# otherwise need a linking mechanism. With this information
# here, FileInfo object comparison can be self contained.
#
# We limit this aggregation to a single proto dir, as
# represented by newentries. That means we don't need to care
# about proto dirs on separate filesystems, or about hardlinks
# that cross proto dir boundaries.
#
hk2path = {}
"""Read in the specified file, assumed to be the
output of protolist.
This has been tested minimally, and is potentially useful for
comparing across the transition period, but should ultimately
go away.
"""
try:
newentries = {}
#
# Use a FileInfo() object instead of a RealFileInfo()
# object because we want to avoid the RealFileInfo
# constructor, because there's nothing to actually stat().
#
try:
continue
except IndexError:
raise BadProtolistFormat(pline)
hk2path = {}
class ManifestParsingError(Exception):
"""This means that the Manifest.set_content() raised an
ActionError. We raise this, instead, to tell us which manifest
could not be parsed, rather than what action error we hit.
"""
class ManifestTree(DirectoryTree):
"""Describes one or more directories containing arbitrarily
many manifests as a dictionary of ActionInfo objects, indexed
by the relative path of the data source within the proto area.
That path may or may not be the same as the path attribute of the
given action.
"""
"""Treats the specified input file as a pkg(5) package
manifest, and extends the ManifestTree dictionary with entries
for the actions therein.
"""
try:
#
# Make sure the manifest is applicable to the user-specified
# architecture. Assumption: if variant.arch is not an
# attribute of the manifest, then the package should be
# installed on all architectures.
#
return
modewarnings = set()
continue
#
# The dir action is currently fully specified, in that it
# lists owner, group, and mode attributes. If that
# changes in pkg(5) code, we'll need to revisit either this
# code or the ActionInfo() constructor. It's possible
# that the pkg(5) system could be extended to provide a
# mechanism for specifying directory permissions outside
# of the individual manifests that deliver files into
# those directories. Doing so at time of manifest
# processing would mean that validate_pkg continues to work,
# but doing so at time of publication would require updates.
#
#
# See pkgsend(1) for the use of NOHASH for objects with
# datastreams. Currently, that means "files," but this
# should work for any other such actions.
#
else:
#
# This is the wrong tool in which to enforce consistency
# on a set of manifests. So instead of comparing the
# different actions with the same "path" attribute, we
# use the first one.
#
continue
#
# As with the manifest itself, if an action has specified
# variant.arch, we look for the target architecture
# therein.
#
var = None
#
# The name of this method changed in pkg(5) build 150, we need to
# work with both sets.
#
else:
return
print "warning: unsafe permissions in %s" % mfile
for w in sorted(modewarnings):
print w
print ""
"""Walks the specified directory looking for pkg(5) manifests.
"""
try:
def resolvehardlinks(self):
"""Populates mode, group, and owner for resolved (ie link target
is present in the manifest tree) hard links.
"""
class ExceptionList(set):
"""Keep track of an exception list as a set of paths to be excluded
from any other lists we build.
"""
try:
"""Build a list of all pathnames from the specified file that
either apply to all architectures (ie which have no trailing
architecture tokens), or to the specified architecture (ie
which have the value of the arch arg as a trailing
architecture token.)
"""
USAGE = """%s [-v] -a arch [-e exceptionfile]... [-L|-M [-X check]...] input_1 [input_2]
where input_1 and input_2 may specify proto lists, proto areas,
or manifest directories. For proto lists, use one or more
-l file
arguments. For proto areas, use one or more
-p dir
arguments. For manifest directories, use one or more
-m dir
arguments.
If -L or -M is specified, then only one input source is allowed, and
it should be one or more manifest directories. These two options are
mutually exclusive.
The -L option is used to generate a proto list to stdout.
The -M option is used to check for safe file and directory modes.
By default, this causes all mode checks to be performed. Individual
mode checks may be turned off using "-X check," where "check" comes
from the following set of checks:
m check for group or other write permissions
w check for user write permissions on files and directories
not owned by root
o check for files that could be safely owned by root
"""Try to give the user useful information when they don't get the
command syntax right.
"""
if msg:
"""Compares two out of three possible data sources: a proto list, a
set of proto areas, and a set of manifests.
"""
try:
if args:
usage()
arch = None
exceptionlists = []
manifestdirs = []
protodirs = []
protolists = []
modechecks = set()
togglemodechecks = set()
trees = []
if opt == "-a":
if arch:
usage("may only specify one architecture")
else:
elif opt == "-e":
elif opt == "-L":
elif opt == "-l":
elif opt == "-M":
elif opt == "-m":
elif opt == "-p":
elif opt == "-v":
elif opt == "-X":
usage("-L and -M require one or more -m args, and no -l or -p")
usage("-L and -M are mutually exclusive")
usage("must specify exactly two of -l, -m, and -p")
usage("-X requires -M")
for s in togglemodechecks:
if s not in ALLMODECHECKS:
usage("unknown mode check %s" % s)
modechecks = None
if not arch:
usage("must specify architecture")
for mdir in manifestdirs:
if listonly:
if modechecks is not None:
for plist in protolists:
try:
if verbose and exceptions:
print "Entries present in exception list but missing from proto area:"
print "\t%s" % exc
print ""
print "Entries present in exception list but also in manifests:"
print "\t%s" % exc
del manifesttree[exc]
print ""
if __name__ == '__main__':
try:
except KeyboardInterrupt:
except IOError: