catalog.py revision 852
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
"""Interfaces and implementation for the Catalog object, as well as functions
that operate on lists of package FMRIs."""
import os
import re
import urllib
import errno
import datetime
import threading
import tempfile
import stat
import bisect
class CatalogException(Exception):
class RenameException(Exception):
"""Used to indicate the server catalog files do not have the expected
permissions."""
"""files should contain a list object with each entry consisting
of a tuple of filename, expected_mode, received_mode."""
if not files:
files = []
msg = _("The following catalog files have incorrect "
"permissions:\n")
msg += _("\t%(fname)s: expected mode: %(emode)s, found "
return msg
"""A Catalog is the representation of the package FMRIs available to
this client or repository. Both purposes utilize the same storage
format.
The serialized structure of the repository is an unordered list of
available package versions, followed by an unordered list of
incorporation relationships between packages. This latter section
allows the graph to be topologically sorted by the client.
S Last-Modified: [timespec]
XXX A authority mirror-uri ...
XXX ...
V fmri
V fmri
...
C fmri
C fmri
...
I fmri fmri
I fmri fmri
...
In order to improve the time to search the catalog, a cached list
of package names is kept in the catalog instance. In an effort
to prevent the catalog from having to generate this list every time
it is constructed, the array that contains the names is pickled and
saved and pkg_names.pkl.
"""
# The file mode to be used for all catalog files.
# XXX Mirroring records also need to be allowed from client
# configuration, and not just catalogs.
#
# XXX It would be nice to include available tags and package sizes,
# although this could also be calculated from the set of manifests.
#
# XXX Current code is O(N_packages) O(M_versions), should be
# O(1) O(M_versions), and possibly O(1) O(1).
#
# XXX Initial estimates suggest that the Catalog could be composed of
# 1e5 - 1e7 lines. Catalogs across these magnitudes will need to be
# spread out into chunks, and may require a delta-oriented update
# interface.
"""Create a catalog. If the path supplied does not exist,
this will create the required directory structure.
Otherwise, if the directories are already in place, the
existing catalog is opened. If pkg_root is specified
and no catalog is found at cat_root, the catalog will be
rebuilt. authority names the authority that
is represented by this catalog."""
# The catalog protects the catalog file from having multiple
# threads writing to it at the same time.
# Rebuild catalog, if we're the depot and it's necessary.
def __set_perms(self):
"""Sets permissions on catalog files if not read_only and if the
current user can do so; raises CatalogPermissionsException if
the permissions are wrong and cannot be corrected."""
"attrs"))
"catalog"))
# Force file_mode, so that unprivileged users can read these.
bad_modes = []
try:
"%o" % fmode))
else:
except EnvironmentError, e:
# If the files don't exist yet, move on.
continue
# If the mode change failed for another reason,
# check to see if we actually needed to change
# it, and if so, add it to bad_modes.
"%o" % fmode))
if bad_modes:
"""Add a package, named by the fmri, to the catalog.
Throws an exception if an identical package is already
present. Throws an exception if package has no version."""
raise CatalogException, \
"Unversioned FMRI not supported: %s" % pfmri
# Callers should verify that the FMRI they're going to add is
# valid; however, this check is here in case they're
# lackadaisical
raise CatalogException, \
"Existing renames make adding FMRI %s invalid." \
% pfmri
if critical:
else:
try:
except IOError, e:
# Creating an empty file
else:
raise
try:
raise CatalogException(
"Package %s is already in the "
"catalog" % pfmri)
else:
finally:
# Catalog size has changed, force recalculation on
# next send()
return ts
"""Store the fmri in a data structure 'd' for fast lookup.
'd' is a dict that maps each package name to another dictionary,
itself mapping each version string to a tuple of the fmri object
and a list of authorities from which the package version is
available, as well as a special key, "versions", which maps to a
list of version objects, kept in sorted order.
pkg_name1: {
"versions": [ <version1>, <version2>, <version3>, ... ],
"version1": ( <fmri1>, [ "auth1", "auth2", ... ],
"version2": ( <fmri2>, [ "auth1", "auth2", ... ],
"version3": ( <fmri3>, [ "auth1", "auth2", ... ],
...
},
pkg_name2: {
...
},
...
(where names in quotes are strings, names in angle brackets are
objects, and the rest of the syntax is Pythonic.
The fmri is expected not to have an embedded authority. If it
does, it will be ignored."""
# This is the simplest representation of the cache data
# structure.
}
else:
"""Read the catalog file in "path" and combine it with the
existing data in "catalog"."""
continue
def added_prefix(self, p):
"""Perform any catalog transformations necessary if
prefix p is found in the catalog. Previously, we didn't
know how to handle this prefix and now we do. If we
need to transform the entry from server to client form,
make sure that happens here."""
# Nothing to do now.
pass
def attrs_as_lines(self):
"""Takes the list of in-memory attributes and returns
a list of strings, each string naming an attribute."""
ret = []
s = "S %s: %s\n" % (k, v)
return ret
"""Helper method that takes the full path to the package
directory and the name of the manifest file, and returns an FMRI
constructed from the information in those components."""
f.version = v
return f
def check_prefix(self):
"""If this version of the catalog knows about new prefixes,
check the on disk catalog to see if we can perform any
transformations based upon previously unknown catalog formats.
This routine will add a catalog attribute if it doesn't exist,
otherwise it checks this attribute against a hard-coded
version-specific tuple to see if new methods were added.
If new methods were added, it will call an additional routine
that updates the on-disk catalog, if necessary."""
# If a prefixes attribute doesn't exist, write one and get on
# with it.
return
# Prefixes attribute does exist. Check if it has changed.
# Nothing to do if prefixes haven't changed
if pfx_set == known_prefixes:
return
# If known_prefixes contains a prefix not in pfx_set,
# add the prefix and perform a catalog transform.
if new:
for p in new:
self.added_prefix(p)
# Write out updated prefixes list
def build_catalog(self):
"""Walk the on-disk package data and build (or rebuild) the
package catalog and search database."""
try:
except OSError, e:
raise
cat_mtime = 0
# XXX eschew os.walk in favor of another os.listdir here?
continue
# XXX force a rebuild despite mtimes?
# XXX queue this and fork later?
print f
# XXX Now this is only used by rename_package() and a handful of tests.
"""Wrapper for extract_matching_fmris."""
return []
"""A generator function that produces FMRIs as it
iterates over the contents of the catalog."""
try:
except IOError, e:
return
else:
raise
continue
try:
continue
if pkg == "pkg":
(cat_name, cat_version),
except ValueError:
# Handle old two-column catalog file, mostly in
# use on server. If *this* doesn't work, we
# have a corrupt catalog.
try:
except ValueError:
raise RuntimeError, \
"corrupt catalog entry for " \
"authority '%s': %s" % \
"""Returns a list of RenameRecords where fmri is listed as the
destination package."""
# Don't bother doing this if no FMRI is present
if not pfmri:
return
# Load renamed packages, if needed
yield rr
"""Returns a list of RenameRecords where fmri is listed as
the source package."""
# Don't bother doing this if no FMRI is present
if not pfmri:
return
# Load renamed packages, if needed
yield rr
def last_modified(self):
"""Return the time at which the catalog was last modified."""
"""Load attributes from the catalog file into the in-memory
attributes dictionary"""
return
if m != None:
# convert npkgs to integer value
def _load_renamed(self):
"""Load the catalog's rename records into self.renamed"""
try:
except IOError, e:
return
else:
raise
]
"""Returns the number of packages in the catalog."""
"""Returns the URL of the catalog's origin."""
"""A static method that takes a file-like object and
a path. This is the other half of catalog.send(). It
reads a stream as an incoming catalog and lays it down
on disk. Content_size is the size in bytes, if known,
of the transfer that is being received. The default
value of -1 means that the size is not known."""
size = 0
bad_fmri = None
for s in filep:
# If line is too short, process the next one
if slen < 2:
continue
# check that line is in the proper format
elif not s[1].isspace():
continue
elif not s[0] in known_prefixes:
elif s.startswith("S "):
elif s.startswith("R "):
else:
# XXX Need to be able to handle old and new
# format catalogs.
try:
except fmri.IllegalFmri, e:
bad_fmri = e
continue
# Check that content was properly received before
# modifying any files.
url = None
# If we got a parse error on FMRIs and transfer
# wasn't truncated, raise a FmriFailures error.
elif bad_fmri:
raise bad_fmri
# Write the authority's origin into our attributes
if auth:
# Mkstemp sets mode 600 on these files by default.
# Restore them to 644, so that unprivileged users
# may read these files.
"""Record that the name of package oldname has been changed
to newname as of version vers. Returns a timestamp
of when the catalog was modified and a RenamedPackage
object that describes the rename."""
# Check that the destination (new) package is already in the
# catalog. Also check that the old package does not exist at
# the version that is being renamed.
raise CatalogException, \
"Destination FMRI %s must be in catalog" % \
raise CatalogException, \
"Src FMRI %s must not be in catalog" % \
# Load renamed packages, if needed
# Check that rename record isn't already in catalog
raise CatalogException, \
"Rename %s is already in the catalog" % rr
# Keep renames acyclic. Check that the destination of this
# rename isn't the source of another rename.
raise RenameException, \
"Can't rename %s. Causes cycle in rename graph." \
"catalog"))
# Recalculate size on next send()
"""Returns true if sfmri and pfmri are the same package because
of a rename operation."""
return True
elif s.new_fmri() and \
return True
return True
return True
return False
"""Returns true if sfmri is a successor to pfmri by way
of a rename operation."""
return True
else:
return False
"""Returns true if sfmri is a predecessor to pfmri by
a rename operation."""
return True
elif s.new_fmri():
return False
"""Returns a list of packages that are newer than pfmri."""
pkgs = []
if s.new_fmri():
return pkgs
"""Returns a list of packages that are older than fmri."""
pkgs = []
return pkgs
"""Save attributes from the in-memory catalog to a file
specified by filenm."""
try:
except IOError, e:
# This may get called in a situation where
# the user does not have write access to the attrs
# file.
return
else:
raise
# Recalculate size on next send()
"""Send the contents of this catalog out to the filep
specified as an argument."""
if rspobj is not None:
def output():
# Send attributes first.
yield line
try:
"r")
except IOError, e:
# Missing catalog is fine; other errors need to
# be reported.
return
else:
raise
for e in cfile:
yield e
if filep:
else:
return output()
"""Set time to timestamp if supplied by caller. Otherwise
use the system time."""
else:
"""Return the size in bytes of the catalog and attributes."""
try:
except OSError, e:
attr_sz = 0
else:
raise
try:
except OSError, e:
cat_sz = 0
else:
raise
"""Check that the fmri supplied as an argument would be
valid to add to the catalog. This checks to make sure that
from adding this FMRI."""
return False
return False
return True
# In order to avoid a fine from the Department of Redundancy Department,
# allow these methods to be invoked without explictly naming the Catalog class.
# Prefixes that this catalog knows how to handle
# Method used by Catalog and UpdateLog. Since UpdateLog needs to know
# about Catalog, keep it in Catalog to avoid circular dependency problems.
def timestamp():
"""Return an integer timestamp that can be used for comparisons."""
return tstr
def ts_to_datetime(ts):
"""Take timestamp ts in string isoformat, and convert it to a datetime
object."""
# usec is not in the string if 0
try:
except ValueError:
usec = 0
return dt
"""Iterate through the given list of PkgFmri objects,
looking for packages matching 'pattern' in 'patterns', based on the
function in 'matcher' and the versioning constraint described by
'constraint'. If 'matcher' is None, uses fmri subset matching
as the default. If 'patterns' is None, 'versions' may be specified,
and looks for packages matching the patterns specified in 'versions'.
When using 'version', the 'constraint' parameter is ignored.
'versions' should be a list of strings of the format:
release,build_release-branch:datetime
...with a value of '*' provided for any component to be ignored. '*' or
'?' may be used within each component value and will act as wildcard
characters ('*' for one or more characters, '?' for a single character).
Returns a sorted list of PkgFmri objects, newest versions first. If
'counthash' is a dictionary, instead store the number of matched fmris
for each package that matches."""
if not matcher:
if patterns is None:
patterns = []
if versions is None:
versions = []
else:
# 'pattern' may be a partially or fully decorated fmri; we want
# to extract its name and version to match separately against
# the catalog.
# XXX "5.11" here needs to be saner
tuples = {}
else:
assert pattern != None
def by_pattern(p):
if not pat_version or \
pat_version, constraint) or \
p.version == pat_version:
if counthash is not None:
else:
if pat_auth:
return p
def by_version(p):
if counthash is not None:
else:
return p
ret = []
if patterns:
for p in pkgs:
res = by_pattern(p)
if res is not None:
elif versions:
for p in pkgs:
res = by_version(p)
if res is not None:
class RenamedPackage(object):
"""An in-memory representation of a rename object. This object records
information about a package that has had its name changed.
Renaming a package presents a number of challenges. The packaging
system must still be able to recognize and decode dependencies on
packages with the old name. In order for this to work correctly, the
rename record must contain both the old and new name of the package. It
is also undesireable to have a renamed package receive subsequent
versions. However, it still should be possible to publish bugfixes to
the old package lineage. This means that we must also record
versioning information at the time a package is renamed.
This versioning information allows us to determine which portions
of the version and namespace are allowed to add new versions.
If a package is re-named to the NULL package at a specific version,
this is equivalent to freezing the package. No further updates to
the version history may be made under that name. (NULL is never open)
The rename catalog format is as follows:
R <srcname> <srcversion> <destname> <destversion>
"""
"""Create a RenamedPackage object. Srcname is the original
name of the package, destname is the name this package
will take after the operation is successful.
Versionstr is the version at which this change takes place. No
versions >= version of srcname will be permitted."""
if destname == "NULL":
destversion = None
else:
if not srcversion and not destversion:
raise RenameException, \
"Must supply a source or destination version"
elif not srcversion:
elif not destversion:
else:
"""Implementing our own == function allows us to properly
check whether a rename object is in a list of renamed
objects."""
return False
return False
return False
return False
return False
return True
"""Return a FMRI that represents the destination name and
version of the renamed package."""
return None
return fm
"""Return a FMRI that represents the most recent version
of the package had it not been renamed."""
return fm