catalog.py revision 265
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
"""Interfaces and implementation for the Catalog object, as well as functions
that operate on lists of package FMRIs."""
import os
import re
import urllib
import errno
import dbm
import signal
import threading
import datetime
import sys
class CatalogException(Exception):
class RenameException(Exception):
"""A Catalog is the representation of the package FMRIs available to
this client or repository. Both purposes utilize the same storage
format.
The serialized structure of the repository is an unordered list of
available package versions, followed by an unordered list of
incorporation relationships between packages. This latter section
allows the graph to be topologically sorted by the client.
S Last-Modified: [timespec]
XXX A authority mirror-uri ...
XXX ...
V fmri
V fmri
...
C fmri
C fmri
...
I fmri fmri
I fmri fmri
...
"""
# XXX Mirroring records also need to be allowed from client
# configuration, and not just catalogs.
#
# XXX It would be nice to include available tags and package sizes,
# although this could also be calculated from the set of manifests.
#
# XXX Current code is O(N_packages) O(M_versions), should be
# O(1) O(M_versions), and possibly O(1) O(1).
#
# XXX Initial estimates suggest that the Catalog could be composed of
# 1e5 - 1e7 lines. Catalogs across these magnitudes will need to be
# spread out into chunks, and may require a delta-oriented update
# interface.
"""Create a catalog. If the path supplied does not exist,
this will create the required directory structure.
Otherwise, if the directories are already in place, the
existing catalog is opened. If pkg_root is specified
and no catalog is found at cat_root, the catalog will be
rebuilt. authority names the authority that
is represented by this catalog."""
self.searchdb_update_handle = None
# We need to lock the search database against multiple
# simultaneous updates from separate threads closing
# publication transactions.
"/search"
if pkg_root is not None:
"""Add a package, named by the fmri, to the catalog.
Throws an exception if an identical package is already
present. Throws an exception if package has no version."""
raise CatalogException, \
"Unversioned FMRI not supported: %s" % fmri
# Callers should verify that the FMRI they're going to add is
# valid; however, this check is here in case they're
# lackadaisical
raise CatalogException, \
"Existing renames make adding FMRI %s invalid." \
% fmri
if critical:
else:
"catalog"))
raise CatalogException, \
"Package %s is already in the catalog" % \
return ts
def added_prefix(self, p):
"""Perform any catalog transformations necessary if
prefix p is found in the catalog. Previously, we didn't
know how to handle this prefix and now we do. If we
need to transform the entry from server to client form,
make sure that happens here."""
# Nothing to do now.
pass
def attrs_as_lines(self):
"""Takes the list of in-memory attributes and returns
a list of strings, each string naming an attribute."""
ret = []
s = "S %s: %s\n" % (k, v)
return ret
"""Helper method that takes the full path to the package
directory and the name of the manifest file, and returns an FMRI
constructed from the information in those components."""
f.version = v
return f
def check_prefix(self):
"""If this version of the catalog knows about new prefixes,
check the on disk catalog to see if we can perform any
transformations based upon previously unknown catalog formats.
This routine will add a catalog attribute if it doesn't exist,
otherwise it checks this attribute against a hard-coded
version-specific tuple to see if new methods were added.
If new methods were added, it will call an additional routine
that updates the on-disk catalog, if necessary."""
# If a prefixes attribute doesn't exist, write one and get on
# with it.
return
# Prefixes attribute does exist. Check if it has changed.
# Nothing to do if prefixes haven't changed
if pfx_set == known_prefixes:
return
# If known_prefixes contains a prefix not in pfx_set,
# add the prefix and perform a catalog transform.
if new:
for p in new:
self.added_prefix(p)
# Write out updated prefixes list
def build_catalog(self):
"""Walk the on-disk package data and build (or rebuild) the
package catalog and search database."""
try:
idx_mtime = \
except OSError, e:
raise
idx_mtime = 0
try:
except OSError, e:
raise
cat_mtime = 0
fmri_list = []
# XXX eschew os.walk in favor of another os.listdir here?
continue
# XXX force a rebuild despite mtimes?
# XXX queue this and fork later?
print f
# XXX force a rebuild despite mtimes?
# If the database doesn't exist, don't bother
# building the list; we'll just build it all.
# If we have no updates to make to the search database but it
# already exists, just make it available. If we do have updates
# to make (including possibly building it from scratch), fork it
# off into another process; when that's done, we'll mark it
# available.
try:
"Failed to open search database", \
"for writing: %s (errno=%s)" % \
try:
"Failed to open search " + \
"database: %s (errno=%s)" % \
else:
"""Handler method for the SIGCLD signal. Checks to see if the
search database update child has finished, and enables searching
if it finished successfully, or logs an error if it didn't."""
if not self.searchdb_update_handle:
return
if rc == 0:
try:
"Failed to open search database", \
"for writing: %s (errno=%s)" % \
try:
self.searchdb_update_handle = None
return
"Failed to open search " + \
"database: %s (errno=%s)" % \
return
elif rc > 0:
# XXX This should be logged instead
print "ERROR building search database:"
if fmri_list:
try:
# Since we're here explicitly to update
# the database, if we fail, there's
# nothing more to do.
"Failed to open search database", \
"for writing: %s (errno=%s)" % \
return 1
else:
# new = True
try:
"Failed to open search database", \
"for writing: %s (errno=%s)" % \
return 1
# XXX We should probably iterate over the catalog, for
# cases where manifests have stuck around, but have been
# moved to historical and removed from the catalog.
fmri_list = (
)
"""Update the search database with the FMRIs passed in via
'fmri_list'. If 'fmri_list' is empty or None, then rebuild the
database from scratch. 'fmri_list' should be a list of tuples
where the first element is the full path to the package name in
pkg_root and the second element is the version string."""
# If we're in the process of updating the database in our
# separate process, and this particular update until that's
# done.
return
try:
finally:
# If we rebuilt the database from scratch ... XXX why would we
# want to do this?
# if new:
# self.searchdb.close()
# self.searchdb = None
# Five digits of a base-62 number represents a little over 900 million.
# Assuming 1 million tokens used in a WOS build (current imports use
# just short of 500k, but we don't have all the l10n packages, and may
# not have all the search tokens we want) and keeping every nightly
# build gives us 2.5 years before we run out of token space. We're
# likely to garbage collect manifests and rebuild the db before then.
#
# XXX We're eventually going to run into conflicts with real tokens
# here. This is unlikely until we hit, say "alias", which is a ways
# off, but we should still look at solving this.
idx_tok_len = 5
def next_token(self):
alphabet = "abcdefghijklmnopqrstuvwxyz"
s = ""
s = k[idx] + s
# XXX Do we want to log warnings as we approach index capacity?
return s
"""Update the search database with the data from the manifest
for 'fmri', which has been collected into 'search_dict'"""
# self.searchdb: token -> (type, fmri, action)
# XXX search_dict doesn't have action info, but should
# Don't update the database if it already has this FMRI's
# indices.
return
# XXX The database files are so damned huge (if
# holey) because we have zillions of copies of
# the full fmri strings. We might want to
# indirect these as well.
"""Because of the size limitations of the underlying database
records, not only do we have to store pointers to the actual
search data, but once the pointer records fill up, we have to
chain those records up to spillover records. This method adds
the pointer to the data to the end of the last link in the
chain, overflowing as necessary. The search token is passed in
as 'token', and the pointer to the actual data which should be
returned is passed in as 'data_token'."""
while True:
try:
except KeyError:
cur = ""
# According to the ndbm man page, the total length of
# key and value must be less than 1024. Seems like the
# actual value is 1018, probably due to some padding or
# accounting bytes or something. The 2 is for the space
# separator and the plus-sign for the extension token.
# XXX The comparison should be against 1017, but that
# crahes in the if clause below trying to append the
# extension token. Dunno why.
# If we're adding the first element in the next
# link of the chain, add the extension token to
# the end of this link, and put the token
# pointing to the data at the beginning of the
# next link.
break # from while True; we're done
# If we find an extension token, start looking
# at the next chain link.
else:
continue
# If we get here, it's safe to append the data token to
# the current link, and get out.
if cur:
else:
break
"""Search through the search database for 'token'. Return a
list of token type / fmri pairs."""
ret = []
while True:
# For each indirect token in the search token's value,
# add its value to the return list. If we see a chain
# token, switch to its value and continue. If we fall
# out of the loop without seeing a chain token, we can
# return.
break
else:
else:
return ret
constraint = None, counthash = None):
"""Iterate through the catalog, looking for packages matching
'pattern', based on the function in 'matcher' and the versioning
constraint described by 'constraint'. If 'matcher' is None,
uses fmri subset matching as the default. Returns a sorted list
of PkgFmri objects, newest versions first. If 'counthash' is a
dictionary, instead store the number of matched fmris for each
package name which was matched."""
tuples = {}
# 'patterns' may be partially or fully decorated fmris; we want
# to extract their names and versions to match separately
# against the catalog.
#
# XXX "5.11" here needs to be saner
else:
pkgs = []
try:
except IOError, e:
return pkgs
else:
raise
continue
try:
continue
if pkg != "pkg":
continue
except ValueError:
# Handle old two-column catalog file, mostly in
# use on server.
continue
"""A generator function that produces FMRIs as it
iterates over the contents of the catalog."""
try:
except IOError, e:
return
else:
raise
continue
try:
continue
if pkg == "pkg":
(cat_name, cat_version),
except ValueError:
# Handle old two-column catalog file, mostly in
# use on server.
"""Returns a list of RenameRecords where fmri is listed as the
destination package."""
# Don't bother doing this if no FMRI is present
if not fmri:
return
# Load renamed packages, if needed
yield rr
"""Returns a list of RenameRecords where fmri is listed as
the source package."""
# Don't bother doing this if no FMRI is present
if not fmri:
return
# Load renamed packages, if needed
yield rr
def last_modified(self):
"""Return the time at which the catalog was last modified."""
"""Load attributes from the catalog file into the in-memory
attributes dictionary"""
return
if m != None:
# convert npkgs to integer value
def _load_renamed(self):
"""Load the catalog's rename records into self.renamed"""
try:
except IOError, e:
return
else:
raise
]
"""Returns the number of packages in the catalog."""
"""A static method that takes a file-like object and
a path. This is the other half of catalog.send(). It
reads a stream as an incoming catalog and lays it down
on disk."""
for s in filep:
if not s[1].isspace():
continue
elif not s[0] in known_prefixes:
elif s.startswith("S "):
elif s.startswith("R "):
else:
# XXX Need to be able to handle old and new
# format catalogs.
"""Record that the name of package oldname has been changed
to newname as of version vers. Returns a timestamp
of when the catalog was modified and a RenamedPackage
object that describes the rename."""
# Check that the destination (new) package is already in the
# catalog. Also check that the old package does not exist at
# the version that is being renamed.
raise CatalogException, \
"Destination FMRI %s must be in catalog" % \
raise CatalogException, \
"Src FMRI %s must not be in catalog" % \
# Load renamed packages, if needed
# Check that rename record isn't already in catalog
raise CatalogException, \
"Rename %s is already in the catalog" % rr
# Keep renames acyclic. Check that the destination of this
# rename isn't the source of another rename.
raise RenameException, \
"Can't rename %s. Causes cycle in rename graph." \
"catalog"))
"""Returns true if fmri and pfmri are the same package because
of a rename operation."""
return True
elif s.new_fmri() and \
return True
return True
return True
return False
"""Returns true if fmri is a successor to pfmri by way
of a rename operation."""
return True
else:
return False
"""Returns true if fmri is a predecessor to pfmri by
a rename operation."""
return True
elif s.new_fmri():
return False
"""Returns a list of packages that are newer than fmri."""
pkgs = []
if s.new_fmri():
return pkgs
"""Returns a list of packages that are older than fmri."""
pkgs = []
return pkgs
"""Save attributes from the in-memory catalog to a file
specified by filenm."""
"""Send the contents of this catalog out to the filep
specified as an argument."""
# Send attributes first.
try:
except IOError, e:
# Missing catalog is fine; other errors need to be
# reported.
return
else:
raise
for e in cfile:
"""Set time to timestamp if supplied by caller. Otherwise
use the system time."""
else:
def search_available(self):
return self._search_available
"""Check that the fmri supplied as an argument would be
valid to add to the catalog. This checks to make sure that
from adding this FMRI."""
return False
return True
# In order to avoid a fine from the Department of Redundancy Department,
# allow these methods to be invoked without explictly naming the Catalog class.
# Prefixes that this catalog knows how to handle
# Method used by Catalog and UpdateLog. Since UpdateLog needs to know
# about Catalog, keep it in Catalog to avoid circular dependency problems.
def timestamp():
"""Return an integer timestamp that can be used for comparisons."""
return tstr
def ts_to_datetime(ts):
"""Take timestamp ts in string isoformat, and convert it to a datetime
object."""
return dt
constraint = None, counthash = None):
"""Iterate through the given list of PkgFmri objects,
looking for packages matching 'pattern', based on the function
in 'matcher' and the versioning constraint described by
'constraint'. If 'matcher' is None, uses fmri subset matching
as the default. Returns a sorted list of PkgFmri objects,
newest versions first. If 'counthash' is a dictionary, instead
store the number of matched fmris for each package name which
was matched."""
if not matcher:
# 'pattern' may be a partially or fully decorated fmri; we want
# to extract its name and version to match separately against
# the catalog.
# XXX "5.11" here needs to be saner
tuples = {}
else:
assert pattern != None
ret = []
for p in pkgs:
if not pat_version or \
pat_version, constraint) or \
p.version == pat_version:
if counthash is not None:
else:
class RenamedPackage(object):
"""An in-memory representation of a rename object. This object records
information about a package that has had its name changed.
Renaming a package presents a number of challenges. The packaging
system must still be able to recognize and decode dependencies on
packages with the old name. In order for this to work correctly, the
rename record must contain both the old and new name of the package. It
is also undesireable to have a renamed package receive subsequent
versions. However, it still should be possible to publish bugfixes to
the old package lineage. This means that we must also record
versioning information at the time a package is renamed.
This versioning information allows us to determine which portions
of the version and namespace are allowed to add new versions.
If a package is re-named to the NULL package at a specific version,
this is equivalent to freezing the package. No further updates to
the version history may be made under that name. (NULL is never open)
The rename catalog format is as follows:
R <srcname> <srcversion> <destname> <destversion>
"""
"""Create a RenamedPackage object. Srcname is the original
name of the package, destname is the name this package
will take after the operation is successful.
Versionstr is the version at which this change takes place. No
versions >= version of srcname will be permitted."""
if destname == "NULL":
destversion = None
else:
if not srcversion and not destversion:
raise RenameException, \
"Must supply a source or destination version"
elif not srcversion:
elif not destversion:
else:
"""Implementing our own == function allows us to properly
check whether a rename object is in a list of renamed
objects."""
return False
return False
return False
return False
return False
return True
"""Return a FMRI that represents the destination name and
version of the renamed package."""
return None
return fm
"""Return a FMRI that represents the most recent version
of the package had it not been renamed."""
return fm