catalog.py revision 207
23N/A#!/usr/bin/python
23N/A#
23N/A# CDDL HEADER START
23N/A#
23N/A# The contents of this file are subject to the terms of the
23N/A# Common Development and Distribution License (the "License").
23N/A# You may not use this file except in compliance with the License.
23N/A#
23N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
23N/A# or http://www.opensolaris.org/os/licensing.
23N/A# See the License for the specific language governing permissions
23N/A# and limitations under the License.
23N/A#
23N/A# When distributing Covered Code, include this CDDL HEADER in each
23N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
23N/A# If applicable, add the following below this CDDL HEADER, with the
23N/A# fields enclosed by brackets "[]" replaced with your own identifying
23N/A# information: Portions Copyright [yyyy] [name of copyright owner]
23N/A#
23N/A# CDDL HEADER END
23N/A#
221N/A# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23N/A# Use is subject to license terms.
23N/A
221N/Aimport os
221N/Aimport re
221N/Aimport sha
23N/Aimport shutil
23N/Aimport time
23N/Aimport urllib
174N/Aimport tempfile
204N/Aimport errno
204N/Aimport dbm
204N/Aimport signal
215N/Aimport threading
264N/A
270N/Aimport pkg.fmri as fmri
23N/Aimport pkg.version as version
23N/Aimport pkg.manifest as manifest
157N/Afrom pkg.subprocess_method import Mopen, PIPE
204N/A
204N/Aclass CatalogException(Exception):
157N/A def __init__(self, args=None):
157N/A self.args = args
157N/A
157N/Aclass Catalog(object):
23N/A """A Catalog is the representation of the package FMRIs available to
258N/A this client or repository. Both purposes utilize the same storage
258N/A format.
258N/A
258N/A The serialized structure of the repository is an unordered list of
34N/A available package versions, followed by an unordered list of
34N/A incorporation relationships between packages. This latter section
34N/A allows the graph to be topologically sorted by the client.
34N/A
24N/A S Last-Modified: [timespec]
26N/A
26N/A XXX A authority mirror-uri ...
26N/A XXX ...
26N/A
26N/A V fmri
50N/A V fmri
50N/A ...
37N/A C fmri
37N/A C fmri
37N/A ...
26N/A I fmri fmri
26N/A I fmri fmri
26N/A ...
50N/A """
50N/A
50N/A # XXX Mirroring records also need to be allowed from client
26N/A # configuration, and not just catalogs.
26N/A #
26N/A # XXX It would be nice to include available tags and package sizes,
270N/A # although this could also be calculated from the set of manifests.
270N/A #
270N/A # XXX Current code is O(N_packages) O(M_versions), should be
270N/A # O(1) O(M_versions), and possibly O(1) O(1).
270N/A #
270N/A # XXX Initial estimates suggest that the Catalog could be composed of
59N/A # 1e5 - 1e7 lines. Catalogs across these magnitudes will need to be
37N/A # spread out into chunks, and may require a delta-oriented update
59N/A # interface.
59N/A
59N/A def __init__(self, cat_root, authority = None, pkg_root = None):
59N/A """Create a catalog. If the path supplied does not exist,
59N/A this will create the required directory structure.
59N/A Otherwise, if the directories are already in place, the
157N/A existing catalog is opened. If pkg_root is specified
157N/A and no catalog is found at cat_root, the catalog will be
59N/A rebuilt. authority names the authority that
59N/A is represented by this catalog."""
59N/A
59N/A self.catalog_root = cat_root
59N/A self.attrs = {}
23N/A self.auth = authority
270N/A self.searchdb_update_handle = None
270N/A self.searchdb = None
157N/A self._search_available = False
157N/A self.deferred_searchdb_updates = []
157N/A # We need to lock the search database against multiple
195N/A # simultaneous updates from separate threads closing
195N/A # publication transactions.
195N/A self.searchdb_lock = threading.Lock()
195N/A self.pkg_root = pkg_root
157N/A if self.pkg_root:
157N/A self.searchdb_file = os.path.dirname(self.pkg_root) + \
157N/A "/search"
195N/A
258N/A self.attrs["npkgs"] = 0
270N/A
204N/A if not os.path.exists(cat_root):
207N/A os.makedirs(cat_root)
204N/A
204N/A catpath = os.path.normpath(os.path.join(cat_root, "catalog"))
204N/A
204N/A if pkg_root is not None:
204N/A self.build_catalog()
204N/A self.set_time()
204N/A self.save_attrs()
270N/A
204N/A self.load_attrs()
204N/A
204N/A def add_fmri(self, fmri, critical = False):
34N/A """Add a package, named by the fmri, to the catalog.
157N/A Throws an exception if an identical package is already
157N/A present. Throws an exception if package has no version."""
157N/A if fmri.version == None:
221N/A raise CatalogException, \
157N/A "Unversioned FMRI not supported: %s" % fmri
270N/A
204N/A if critical:
204N/A pkgstr = "C %s\n" % fmri.get_fmri(anarchy = True)
157N/A else:
195N/A pkgstr = "V %s\n" % fmri.get_fmri(anarchy = True)
220N/A
50N/A pathstr = os.path.normpath(os.path.join(self.catalog_root,
270N/A "catalog"))
270N/A
270N/A pfile = file(pathstr, "a+")
270N/A pfile.seek(0)
270N/A
270N/A for entry in pfile:
270N/A if entry == pkgstr:
270N/A pfile.close()
270N/A raise CatalogException, \
270N/A "Package %s is already in the catalog" % \
270N/A fmri
270N/A
270N/A pfile.write(pkgstr)
270N/A pfile.close()
270N/A
270N/A self.attrs["npkgs"] += 1
270N/A
270N/A self.set_time()
270N/A self.save_attrs()
270N/A
270N/A def attrs_as_lines(self):
270N/A """Takes the list of in-memory attributes and returns
270N/A a list of strings, each string naming an attribute."""
157N/A
157N/A ret = []
157N/A
157N/A for k,v in self.attrs.items():
157N/A s = "S %s: %s\n" % (k, v)
157N/A ret.append(s)
157N/A
26N/A return ret
258N/A
258N/A def _fmri_from_path(self, pkg, vers):
258N/A """Helper method that takes the full path to the package
258N/A directory and the name of the manifest file, and returns an FMRI
258N/A constructed from the information in those components."""
258N/A
258N/A v = version.Version(urllib.unquote(vers), None)
258N/A f = fmri.PkgFmri(urllib.unquote(os.path.basename(pkg)), None)
50N/A f.version = v
157N/A return f
157N/A
157N/A def build_catalog(self):
34N/A """Walk the on-disk package data and build (or rebuild) the
157N/A package catalog and search database."""
157N/A try:
157N/A idx_mtime = \
157N/A os.stat(self.searchdb_file + ".pag").st_mtime
157N/A except OSError, e:
36N/A if e.errno != errno.ENOENT:
157N/A raise
157N/A idx_mtime = 0
157N/A
157N/A try:
207N/A cat_mtime = os.stat(os.path.join(
207N/A self.catalog_root, "catalog")).st_mtime
157N/A except OSError, e:
157N/A if e.errno != errno.ENOENT:
157N/A raise
30N/A cat_mtime = 0
157N/A
50N/A fmri_list = []
215N/A
215N/A # XXX eschew os.walk in favor of another os.listdir here?
215N/A tree = os.walk(self.pkg_root)
270N/A for pkg in tree:
270N/A if pkg[0] == self.pkg_root:
270N/A continue
270N/A
215N/A for e in os.listdir(pkg[0]):
157N/A ver_mtime = os.stat(os.path.join(
220N/A self.pkg_root, pkg[0], e)).st_mtime
220N/A
220N/A # XXX force a rebuild despite mtimes?
220N/A # XXX queue this and fork later?
220N/A if ver_mtime > cat_mtime:
220N/A f = self._fmri_from_path(pkg[0], e)
220N/A
220N/A self.add_fmri(f)
220N/A print f
220N/A
157N/A # XXX force a rebuild despite mtimes?
157N/A # If the database doesn't exist, don't bother
157N/A # building the list; we'll just build it all.
50N/A if ver_mtime > idx_mtime > 0:
50N/A fmri_list.append((pkg[0], e))
50N/A
157N/A # If we have no updates to make to the search database but it
157N/A # already exists, just make it available. If we do have updates
157N/A # to make (including possibly building it from scratch), fork it
157N/A # off into another process; when that's done, we'll mark it
157N/A # available.
157N/A if not fmri_list and idx_mtime > 0:
204N/A self.searchdb = dbm.open(self.searchdb_file, "w")
204N/A self._search_available = True
204N/A else:
204N/A signal.signal(signal.SIGCHLD, self.child_handler)
204N/A self.searchdb_update_handle = \
204N/A Mopen(self.update_searchdb, [fmri_list], {},
204N/A stderr = PIPE)
204N/A
204N/A def child_handler(self, sig, frame):
204N/A """Handler method for the SIGCLD signal. Checks to see if the
220N/A search database update child has finished, and enables searching
220N/A if it finished successfully, or logs an error if it didn't."""
220N/A if not self.searchdb_update_handle:
220N/A return
220N/A
220N/A rc = self.searchdb_update_handle.poll()
220N/A if rc == 0:
220N/A self.searchdb = dbm.open(self.searchdb_file, "w")
220N/A self._search_available = True
220N/A self.searchdb_update_handle = None
220N/A if self.deferred_searchdb_updates:
220N/A self.update_searchdb(
220N/A self.deferred_searchdb_updates)
220N/A elif rc > 0:
220N/A # XXX This should be logged instead
220N/A print "ERROR building search database:"
220N/A print self.searchdb_update_handle.stderr.read()
220N/A
220N/A def update_searchdb(self, fmri_list):
220N/A """Update the search database with the FMRIs passed in via
220N/A 'fmri_list'. If 'fmri_list' is empty or None, then rebuild the
220N/A database from scratch. 'fmri_list' should be a list of tuples
220N/A where the first element is the full path to the package name in
220N/A pkg_root and the second element is the version string."""
220N/A
220N/A # If we're in the process of updating the database in our
220N/A # separate process, and this particular update until that's
220N/A # done.
220N/A if self.searchdb_update_handle:
220N/A self.deferred_searchdb_updates += fmri_list
220N/A return
220N/A
220N/A self.searchdb_lock.acquire()
220N/A
220N/A new = False
220N/A if fmri_list:
220N/A if not self.searchdb:
220N/A self.searchdb = \
220N/A dbm.open(self.searchdb_file, "c")
220N/A
204N/A if not self.searchdb.has_key("indir_num"):
204N/A self.searchdb["indir_num"] = "0"
204N/A else:
204N/A # new = True
204N/A self.searchdb = dbm.open(self.searchdb_file, "n")
204N/A self.searchdb["indir_num"] = "0"
204N/A # XXX We should probably iterate over the catalog, for
204N/A # cases where manifests have stuck around, but have been
204N/A # moved to historical and removed from the catalog.
204N/A fmri_list = (
204N/A (os.path.join(self.pkg_root, pkg), ver)
204N/A for pkg in os.listdir(self.pkg_root)
204N/A for ver in os.listdir(
204N/A os.path.join(self.pkg_root, pkg))
204N/A )
204N/A
204N/A for pkg, vers in fmri_list:
204N/A mfst_path = os.path.join(pkg, vers)
204N/A mfst = manifest.Manifest()
204N/A mfst_file = file(mfst_path)
157N/A mfst.set_content(mfst_file.read())
157N/A mfst_file.close()
204N/A
157N/A f = self._fmri_from_path(pkg, vers)
204N/A
157N/A self.update_index(f, mfst.search_dict())
157N/A
157N/A self.searchdb_lock.release()
204N/A
204N/A # If we rebuilt the database from scratch ... XXX why would we
204N/A # want to do this?
204N/A # if new:
204N/A # self.searchdb.close()
204N/A # self.searchdb = None
204N/A self._search_available = True
204N/A
204N/A # Five digits of a base-62 number represents a little over 900 million.
204N/A # Assuming 1 million tokens used in a WOS build (current imports use
204N/A # just short of 500k, but we don't have all the l10n packages, and may
204N/A # not have all the search tokens we want) and keeping every nightly
204N/A # build gives us 2.5 years before we run out of token space. We're
204N/A # likely to garbage collect manifests and rebuild the db before then.
204N/A #
204N/A # XXX We're eventually going to run into conflicts with real tokens
204N/A # here. This is unlikely until we hit, say "alias", which is a ways
204N/A # off, but we should still look at solving this.
204N/A idx_tok_len = 5
204N/A
204N/A def next_token(self):
204N/A alphabet = "abcdefghijklmnopqrstuvwxyz"
204N/A k = "0123456789" + alphabet + alphabet.upper()
264N/A
264N/A num = int(self.searchdb["indir_num"])
264N/A
264N/A s = ""
264N/A for i in range(1, self.idx_tok_len + 1):
264N/A junk, tail = divmod(num, 62 ** i)
264N/A idx, junk = divmod(tail, 62 ** (i - 1))
264N/A s = k[idx] + s
264N/A
264N/A # XXX Do we want to log warnings as we approach index capacity?
264N/A self.searchdb["indir_num"] = \
264N/A str(int(self.searchdb["indir_num"]) + 1)
264N/A
264N/A return s
264N/A
264N/A def update_index(self, fmri, search_dict):
264N/A """Update the search database with the data from the manifest
264N/A for 'fmri', which has been collected into 'search_dict'"""
264N/A # self.searchdb: token -> (type, fmri, action)
264N/A # XXX search_dict doesn't have action info, but should
204N/A
204N/A # Don't update the database if it already has this FMRI's
204N/A # indices.
204N/A if self.searchdb.has_key(str(fmri)):
204N/A return
204N/A
204N/A self.searchdb[str(fmri)] = "True"
204N/A for tok_type in search_dict.keys():
204N/A for tok in search_dict[tok_type]:
204N/A # XXX The database files are so damned huge (if
204N/A # holey) because we have zillions of copies of
204N/A # the full fmri strings. We might want to
204N/A # indirect these as well.
204N/A s = "%s %s" % (tok_type, fmri)
204N/A s_ptr = self.next_token()
264N/A self.searchdb[s_ptr] = s
264N/A
264N/A self.update_chain(tok, s_ptr)
264N/A
264N/A def update_chain(self, token, data_token):
274N/A """Because of the size limitations of the underlying database
264N/A records, not only do we have to store pointers to the actual
264N/A search data, but once the pointer records fill up, we have to
264N/A chain those records up to spillover records. This method adds
264N/A the pointer to the data to the end of the last link in the
264N/A chain, overflowing as necessary. The search token is passed in
264N/A as 'token', and the pointer to the actual data which should be
264N/A returned is passed in as 'data_token'."""
264N/A
264N/A while True:
264N/A try:
264N/A cur = self.searchdb[token]
264N/A except KeyError:
264N/A cur = ""
264N/A l = len(cur)
264N/A
264N/A # According to the ndbm man page, the total length of
264N/A # key and value must be less than 1024. Seems like the
264N/A # actual value is 1018, probably due to some padding or
264N/A # accounting bytes or something. The 2 is for the space
204N/A # separator and the plus-sign for the extension token.
204N/A # XXX The comparison should be against 1017, but that
204N/A # crahes in the if clause below trying to append the
204N/A # extension token. Dunno why.
204N/A if len(token) + l + self.idx_tok_len + 2 > 1000:
204N/A # If we're adding the first element in the next
204N/A # link of the chain, add the extension token to
204N/A # the end of this link, and put the token
265N/A # pointing to the data at the beginning of the
204N/A # next link.
204N/A if cur[-(self.idx_tok_len + 1)] != "+":
261N/A nindir_tok = "+" + self.next_token()
264N/A self.searchdb[token] += " " + nindir_tok
264N/A self.searchdb[nindir_tok] = data_token
264N/A break # from while True; we're done
264N/A # If we find an extension token, start looking
264N/A # at the next chain link.
264N/A else:
264N/A token = cur[-(self.idx_tok_len + 1):]
264N/A continue
264N/A
264N/A # If we get here, it's safe to append the data token to
264N/A # the current link, and get out.
264N/A if cur:
204N/A self.searchdb[token] += " " + data_token
204N/A else:
204N/A self.searchdb[token] = data_token
204N/A break
204N/A
264N/A def search(self, token):
264N/A """Search through the search database for 'token'. Return a
264N/A list of token type / fmri pairs."""
264N/A ret = []
264N/A
264N/A while True:
264N/A # For each indirect token in the search token's value,
264N/A # add its value to the return list. If we see a chain
264N/A # token, switch to its value and continue. If we fall
264N/A # out of the loop without seeing a chain token, we can
204N/A # return.
204N/A for tok in self.searchdb[token].split():
204N/A if tok[0] == "+":
204N/A token = tok
204N/A break
204N/A else:
204N/A ret.append(
204N/A self.searchdb[tok].split(" ", 1))
204N/A else:
204N/A return ret
204N/A
204N/A def get_matching_fmris(self, patterns, matcher = None,
204N/A constraint = None, counthash = None):
204N/A """Iterate through the catalog, looking for packages matching
204N/A 'pattern', based on the function in 'matcher' and the versioning
204N/A constraint described by 'constraint'. If 'matcher' is None,
204N/A uses fmri subset matching as the default. Returns a sorted list
204N/A of PkgFmri objects, newest versions first. If 'counthash' is a
204N/A dictionary, instead store the number of matched fmris for each
204N/A package name which was matched."""
204N/A
204N/A cat_auth = self.auth
265N/A
265N/A if not matcher:
265N/A matcher = fmri.fmri_match
265N/A
265N/A if not isinstance(patterns, list):
265N/A patterns = [ patterns ]
265N/A
265N/A # 'pattern' may be a partially or fully decorated fmri; we want
265N/A # to extract its name and version to match separately against
265N/A # the catalog.
265N/A # XXX "5.11" here needs to be saner
265N/A tuples = {}
265N/A
265N/A for pattern in patterns:
265N/A if isinstance(pattern, fmri.PkgFmri):
265N/A tuples[pattern] = pattern.tuple()
265N/A else:
265N/A tuples[pattern] = \
265N/A fmri.PkgFmri(pattern, "5.11").tuple()
265N/A
204N/A ret = []
204N/A
204N/A try:
204N/A pfile = file(os.path.normpath(
204N/A os.path.join(self.catalog_root, "catalog")), "r")
204N/A except IOError, e:
204N/A if e.errno == errno.ENOENT:
157N/A return ret
204N/A else:
204N/A raise
204N/A
204N/A for entry in pfile:
204N/A try:
204N/A cv, pkg, cat_name, cat_version = entry.split()
204N/A except ValueError:
204N/A # Handle old two-column catalog file, mostly in
204N/A # use on server.
204N/A cv, cat_fmri = entry.split()
204N/A pkg = "pkg"
204N/A cat_auth, cat_name, cat_version = \
204N/A fmri.PkgFmri(cat_fmri, "5.11",
204N/A authority = self.auth).tuple()
204N/A
204N/A for pattern in patterns:
204N/A pat_auth, pat_name, pat_version = tuples[pattern]
204N/A if pkg == "pkg" and \
204N/A (pat_auth == cat_auth or not pat_auth) and \
204N/A matcher(cat_name, pat_name):
204N/A pkgfmri = fmri.PkgFmri("%s@%s" %
204N/A (cat_name, cat_version),
204N/A authority = cat_auth)
204N/A if not pat_version or \
204N/A pkgfmri.version.is_successor(
204N/A pat_version, constraint) or \
204N/A pkgfmri.version == pat_version:
204N/A if counthash is not None:
204N/A if pattern in counthash:
204N/A counthash[pattern] += 1
204N/A else:
204N/A counthash[pattern] = 1
204N/A ret.append(pkgfmri)
204N/A
204N/A pfile.close()
204N/A
204N/A return sorted(ret, reverse = True)
204N/A
204N/A def fmris(self):
204N/A """A generator function that produces FMRIs as it
204N/A iterates over the contents of the catalog."""
204N/A
204N/A try:
204N/A pfile = file(os.path.normpath(
204N/A os.path.join(self.catalog_root, "catalog")), "r")
204N/A except IOError, e:
204N/A if e.errno == errno.ENOENT:
204N/A return
204N/A else:
204N/A raise
204N/A
204N/A for entry in pfile:
204N/A try:
157N/A cv, pkg, cat_name, cat_version = entry.split()
204N/A if pkg == "pkg":
204N/A yield fmri.PkgFmri("%s@%s" %
204N/A (cat_name, cat_version),
204N/A authority = self.auth)
204N/A except ValueError:
204N/A # Handle old two-column catalog file, mostly in
204N/A # use on server.
204N/A cv, cat_fmri = entry.split()
204N/A yield fmri.PkgFmri(cat_fmri,
204N/A authority = self.auth)
204N/A
204N/A pfile.close()
204N/A
204N/A def load_attrs(self, filenm = "attrs"):
204N/A """Load attributes from the catalog file into the in-memory
204N/A attributes dictionary"""
204N/A
204N/A apath = os.path.normpath(
204N/A os.path.join(self.catalog_root, filenm))
204N/A if not os.path.exists(apath):
204N/A return
204N/A
204N/A afile = file(apath, "r")
204N/A attrre = re.compile('^S ([^:]*): (.*)')
204N/A
204N/A for entry in afile:
204N/A m = attrre.match(entry)
204N/A if m != None:
204N/A self.attrs[m.group(1)] = m.group(2)
204N/A
204N/A afile.close()
204N/A
204N/A if "npkgs" in self.attrs:
204N/A self.attrs["npkgs"] = int(self.attrs["npkgs"])
204N/A
204N/A def npkgs(self):
204N/A """Returns the number of packages in the catalog."""
204N/A
204N/A return self.attrs["npkgs"]
204N/A
204N/A @staticmethod
204N/A def recv(filep, path):
204N/A """A class method that takes a file-like object and
204N/A a path. This is the other half of catalog.send(). It
204N/A reads a stream as an incoming catalog and lays it down
204N/A on disk."""
204N/A
204N/A if not os.path.exists(path):
204N/A os.makedirs(path)
204N/A
204N/A attrf = file(os.path.normpath(
204N/A os.path.join(path, "attrs")), "w+")
204N/A catf = file(os.path.normpath(
204N/A os.path.join(path, "catalog")), "w+")
204N/A
204N/A for s in filep:
204N/A if s.startswith("S "):
204N/A attrf.write(s)
204N/A else:
204N/A # XXX Need to be able to handle old and new
204N/A # format catalogs.
204N/A f = fmri.PkgFmri(s[2:])
204N/A catf.write("%s %s %s %s\n" %
204N/A (s[0], "pkg", f.pkg_name, f.version))
204N/A
204N/A attrf.close()
204N/A catf.close()
204N/A
204N/A def save_attrs(self, filenm = "attrs"):
204N/A """Save attributes from the in-memory catalog to a file
157N/A specified by filenm."""
161N/A
161N/A afile = file(os.path.normpath(
161N/A os.path.join(self.catalog_root, filenm)), "w+")
161N/A for a in self.attrs.keys():
161N/A s = "S %s: %s\n" % (a, self.attrs[a])
161N/A afile.write(s)
161N/A
161N/A afile.close()
161N/A
161N/A def send(self, filep):
195N/A """Send the contents of this catalog out to the filep
221N/A specified as an argument."""
270N/A
270N/A # Send attributes first.
270N/A filep.writelines(self.attrs_as_lines())
270N/A
270N/A try:
270N/A cfile = file(os.path.normpath(
270N/A os.path.join(self.catalog_root, "catalog")), "r")
161N/A except IOError, e:
161N/A # Missing catalog is fine; other errors need to be
161N/A # reported.
161N/A if e.errno == errno.ENOENT:
221N/A return
221N/A else:
221N/A raise
221N/A
161N/A for e in cfile:
161N/A filep.write(e)
161N/A
161N/A cfile.close()
161N/A
161N/A def set_time(self):
161N/A self.attrs["Last-Modified"] = time.strftime("%Y%m%dT%H%M%SZ")
157N/A
270N/A def search_available(self):
270N/A return self._search_available
270N/A
270N/A
270N/A# In order to avoid a fine from the Department of Redundancy Department,
270N/A# allow these methods to be invoked without explictly naming the Catalog class.
220N/Arecv = Catalog.recv
270N/A