#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
#
import atexit
import collections
import errno
import os
import shutil
import six
import sys
import tempfile
import pkg
"""Base exception class for archive class errors."""
"""Used to indicate that the specified index is in a format not
supported or recognized by this version of the pkg(7) ArchiveIndex
class."""
return _("{0} is not in a supported or recognizable archive "
"""Class representing a pkg(7) archive table of contents and a set of
interfaces to populate and retrieve entries.
Entries in this file are written in the following format:
<name>NUL<offset>NUL<entry_size>NUL<size>NUL<typeflag>NULNL
<name> is a string containing the pathname of the file in the
archive. It can be up to 65,535 bytes in length.
<offset> is an unsigned long long integer containing the relative
offset in bytes of the first header block for the file in the
archive. The offset is relative to the end of the last block of
the first file in the archive.
<entry_size> is an unsigned long long integer containing the size of
the file's entry in bytes in the archive (including archive
headers and trailers for the entry).
<size> is an unsigned long long integer containing the size of the
file in bytes in the archive.
<typeflag> is a single character representing the type of the file
in the archive. Possible values are:
0 Regular File
1 Hard Link
2 Symbolic Link
5 Directory or subdirectory"""
version = None
"""Open a pkg(7) archive table of contents file.
'name' should be the absolute path of the file to use when
reading or writing index data.
'mode' indicates whether the index is being used for reading
or writing, and can be 'r' or 'w'. Appending to or updating
a table of contents file is not supported.
'version' is an optional integer value specifying the version
of the index to be read or written. If not specified, the
current version is assumed.
"""
if version is None:
raise InvalidArchiveIndex(name)
try:
except IOError as e:
if e.errno:
raise
# Underlying gzip library raises this exception if the
# file isn't a valid gzip file. So, assume that if
# errno isn't set, this is a gzip error instead.
raise InvalidArchiveIndex(name)
"""Context handler that ensures archive is automatically closed
in a non-error condition scenario. This enables 'with' usage.
"""
# Only close filehandles in an error condition.
else:
# Close archive normally in all other cases.
"""The absolute path of the archive index file."""
"""Add an entry for the given archive file to the table of
contents."""
# GzipFile.write requires bytes input
"""Returns a generator that yields tuples of the form (name,
offset) for each file in the index."""
l = None
try:
# Under Python 3, indexing on a bytes will
# return an integer representing the
# unicode code point of that character; we
# need to use slicing to get the character.
# Filename contained newline.
if l is None:
l = line
else:
l += b"\n"
l += line
continue
elif l is None:
l = line
l = None
except ValueError:
except IOError as e:
if e.errno:
raise
# Underlying gzip library raises this exception if the
# file isn't a valid gzip file. So, assume that if
# errno isn't set, this is a gzip error instead.
"""Close the index. No further operations can be performed
using this object once closed."""
return
"""Used to indicate that the specified archive is in a format not
supported or recognized by this version of the pkg(7) Archive class.
"""
return _("Archive {0} is missing, unsupported, or corrupt.").format(
"""Used to indicate that the specified file(s) could not be found in the
archive.
"""
return _("Package archive {arc_name} contains corrupt "
"entries for the requested package file(s):\n{files}.").format(
"""Used to indicate that the specified file(s) could not be found in the
archive.
"""
return _("Package archive {arc_name} does not contain the "
"requested package file(s):\n{files}.").format(
"""Used to indicate that a manifest for the specified package could not
be found in the archive.
"""
return _("No package manifest for package '{pfmri}' exists "
"""Class representing a pkg(7) archive and a set of interfaces to
populate it and retrieve data from it.
This class stores package data in pax archives in version 4 repository
format. Encoding the structure of a repository into the archive is
necessary to enable easy composition of package archive contents with
existing repositories and to enable consumers to access the contents of
a package archive the same as they would a repository.
This class can be used to access or extract the contents of almost any
tar archive, except for those that are compressed.
"""
__index = None
__arc_tfile = None
__arc_file = None
version = None
# If the repository format changes, then the version of the package
# archive format should be rev'd and this updated. (Although that isn't
# strictly necessary, as the Repository class should remain backwards
# compatible with this format.)
"""'pathname' is the absolute path of the archive file to create
or read from.
'mode' is a string used to indicate whether the archive is being
opened for reading or writing, which is indicated by 'r' and 'w'
respectively. An archive opened for writing may not be used for
any extraction operations, and must not already exist.
'archive_index', if supplied is the dictionary returned by
self.get_index(), allowing multiple Archive objects to be open,
sharing the same index object, for efficient use of memory.
Using an existing archive_index requires mode='r'.
"""
# Used to cache publisher objects.
# Used to cache location of publisher catalog data.
self.__catalogs = {}
mode += ":"
assert "a" not in mode
if "w" in mode:
# Don't allow overwrite of existing archive.
# Ensure we're not sharing an index object.
assert not archive_index
try:
128*1024)
except EnvironmentError as e:
raise apx._convert_error(e)
# Ensure cleanup is performed on exit if the archive is not
# explicitly closed.
def arc_cleanup():
return
# Open the pax archive for the package.
try:
except EnvironmentError as e:
raise apx._convert_error(e)
except Exception:
# Likely not an archive or the archive is corrupt.
self.__extract_offsets = {}
if "r" in mode:
# Opening the tarfile loaded the first member, which
# should be the archive index file.
if not member:
# Archive is empty.
# If we have an archive_index use that and return
# immediately. We assume that the caller has obtained
# the index from an exising Archive object,
# and will have validated the version of that archive.
if archive_index:
return
return
else:
return
try:
except (IndexError, ValueError):
# Create a temporary file to extract the index to,
# and then extract it from the archive.
try:
# Read error encountered.
except EnvironmentError as e:
raise apx._convert_error(e)
# After extraction, the current archive file offset
# is the base that will be used for all other
# extractions.
# Load archive index.
try:
except InvalidArchiveIndex:
# Index is corrupt; rather than driving on
# and failing later, bail now.
except EnvironmentError as e:
raise apx._convert_error(e)
elif "w" in mode:
# Force normalization of archive member mode and
# ownership information during archive creation.
"pkg5.archive.version.{0:d}".format(
return ti
# Create a temporary file to write the index to,
# and then create the index.
# Used to determine what the default publisher will be
# for the archive file at close().
# Used to keep track of which package files have already
# been added to archive.
# Always create archives using current version.
# Always add base publisher directory to start; tarfile
# requires an actual filesystem object to do this, so
# re-use an existing directory to do so.
"""Context handler that ensures archive is automatically closed
in a non-error condition scenario. This enables 'with' usage.
"""
# Only close file objects; don't actually write anything
# out in an error condition.
return
"""Private helper method to find offsets for individual archive
member extraction.
"""
return
# This causes the entire archive to be read, but is the only way
# to find the offsets to extract everything.
try:
# Read error encountered.
except EnvironmentError as e:
raise apx._convert_error(e)
"""Creates a temporary directory for use during archive
operations, and return its absolute path. The temporary
directory will be removed after the archive is closed.
"""
try:
except EnvironmentError as e:
raise apx._convert_error(e)
"""Creates a temporary file for use during archive operations,
and returns a file object for it and its absolute path. The
temporary file will be removed after the archive is closed.
"""
try:
except EnvironmentError as e:
raise apx._convert_error(e)
"""Queue the specified object for addition to the archive.
The archive will be created and the object added to it when the
close() method is called. The target object must not change
after this method is called while the archive is open. The
item being added must not already exist in the archive.
'pathname' is an optional string specifying the absolute path
of a file to add to the archive. The file may be a regular
file, directory, symbolic link, or hard link.
'arcname' is an optional string specifying an alternative name
for the file in the archive. If not given, the full pathname
provided will be used.
"""
# Pre-calculate size of archive entry by determining where
# in the archive the entry would be added.
if rem > 0:
blocks += 1
# Record name, offset, entry_size, size type for each file.
# Discard tarinfo; it would be more efficient to keep these in
# memory, but at a significant memory footprint cost.
del ti
repo=None):
"""Private helper function for adding package files."""
# Directory entry needs to be added
# for package files.
# Directory entry needs to be added
# for hash directory.
# Already added for a different
# package.
continue
if repo:
else:
# A bit expensive potentially in terms of
# memory usage, but necessary to prevent
# duplicate archive entries.
"""Private helper function that queues a package for addition to
the archive.
'mpath' is the absolute path of the package manifest file.
'fpath' is an optional directory containing the package files
stored by hash.
'repo' is an optional Repository object to use to retrieve the
data for the package to be added to the archive.
'fpath' or 'repo' must be provided.
"""
assert mpath
if not self.__default_pub:
# Throughout this function, the archive root directory is used
# as a template to add other directories that should be present
# in the archive. This is necessary as the tarfile class does
# not support adding arbitrary archive entries without a real
# filesystem object as a source.
if d not in self.__processed_pfiles:
# After manifest has been loaded, assume it's ok to queue the
# manifest itself for addition to the archive.
# Entry may need to be added for manifest directory.
# Entry needs to be added for manifest file.
# Now add any files to the archive for every action that has a
# payload. (That payload can consist of multiple files.)
for a in m.gen_actions():
if not a.has_payload:
# Nothing to archive.
continue
if not hval:
# Nothing to archive
continue
# Signature actions require special handling.
if a.name == "signature":
for c in a.get_chain_certs(
if repo:
# This bit of logic only possible if
# package source is a repository.
None)
if not pub:
assert pub
if not payloads:
# Nothing more to do.
continue
"""Queues the specified package for addition to the archive.
The archive will be created and the package added to it when
the close() method is called. The package contents must not
change after this method is called while the archive is open.
'pfmri' is the FMRI string or object identifying the package to
add.
'mpath' is the absolute path of the package manifest file.
'fpath' is the directory containing the package files stored
by hash.
"""
"""Queues the specified package in a repository for addition to
the archive. The archive will be created and the package added
to it when the close() method is called. The package contents
must not change after this method is called while the archive is
open.
'pfmri' is the FMRI string or object identifying the package to
add.
'repo' is the Repository object to use to retrieve the data for
the package to be added to the archive.
"""
"""Extract the named v1 catalog part to the specified directory.
'part' is the name of the catalog file part.
'path' is the absolute path of the directory to extract the
file to. It will be created automatically if it does not
exist.
'pub' is an optional publisher prefix. If not provided, the
first publisher catalog found in the archive will be used.
"""
# If the extraction index doesn't exist, scan the
# complete archive and build one.
pubs = [
p for p in self.get_publishers()
]
if not pubs:
if not pub:
# Default to first known publisher.
# Expected locations in archive for various metadata.
# A trailing slash is appended so that archive entry
# comparisons skip the entries for the directory.
# Catalog file requested for this publisher before.
if croot:
# Catalog data is cached because it was
# generated on demand, so just copy it
# from there to the destination.
raise UnknownArchiveFiles(
try:
except EnvironmentError as e:
raise apx._convert_error(e)
else:
# Use default extraction logic.
return
# Determine whether any catalog files are present for this
# publisher in the archive.
# Any catalog file at all means this publisher
# should be marked as being known to have one
# and then the request passed on to extract_to.
# No catalog data found for publisher; construct a catalog
# in memory based on packages found for publisher.
lm = None
# Store catalog in a temporary directory and mark publisher
# as having catalog data cached.
if lm:
# Finally, copy requested file to destination.
try:
except EnvironmentError as e:
raise apx._convert_error(e)
"""Extract one or more package files from the archive.
'hashes' is a list of the files to extract named by their hash.
'path' is the absolute path of the directory to extract the
files to. It will be created automatically if it does not
exist.
'pub' is the prefix (name) of the publisher that the package
files are associated with. If not provided, the first file
named after the given hash found in the archive will be used.
(This will be noticeably slower depending on the size of the
archive.)
"""
assert hashes
# If the extraction index doesn't exist, scan the complete
# archive and build one.
if not pub:
# Scan extract offsets index for the first instance of
# any package file seen for each hash and extract the
# file as each is found.
break
if not hashes:
break
if hashes:
# Any remaining hashes are for package files
# that couldn't be found.
return
"""Extract a package manifest from the archive.
'pfmri' is the FMRI string or object identifying the package
manifest to extract.
'path' is the absolute path of the directory to extract the
manifest to. It will be created automatically if it does not
exist.
'filename' is an optional name to use for the extracted file.
If not provided, the default behaviour is to create a directory
named after the package stem in 'path' and a file named after
the version in that directory; both components will be URI
encoded.
"""
if not filename:
try:
except UnknownArchiveFiles:
"""Extract a member from the archive.
'src' is the pathname of the archive file to extract.
'path' is the absolute path of the directory to extract the file
to.
'filename' is an optional string indicating the name to use for
the extracted file. If not provided, the full member name in
the archive will be used.
"""
# Get the offset in the archive for the given file, and then
# seek to it.
if offset is not None:
# Prepare the tarfile object for extraction by telling
# it where to look for the file.
# Get the tarinfo object needed to extract the file.
try:
# Read error encountered.
except EnvironmentError as e:
raise apx._convert_error(e)
# Index must be invalid or tarfile has gone off
# the rails trying to read the archive.
elif self.__extract_offsets:
# Assume there is no such archive member if extract
# offsets are known, but the item can't be found.
else:
# No archive index; fallback to retrieval by name.
# Extract the file to the specified location.
try:
except KeyError:
# Read error encountered.
except EnvironmentError as e:
raise apx._convert_error(e)
# Nothing more to do.
return
# If possible, validate the size of the extracted object.
try:
if not filename:
[src])
except EnvironmentError as e:
raise apx._convert_error(e)
"""Returns an archive member as a file object. If the matching
member is a regular file, a file-like object will be returned.
If it is a link, a file-like object is constructed from the
link's target. In all other cases, None will be returned. The
file-like object is read-only and provides methods: read(),
readline(), readlines(), seek() and tell(). The returned object
must be closed before the archive is, and must not be used after
the archive is closed.
'src' is the pathname of the archive file to return.
"""
# Get the offset in the archive for the given file, and then
# seek to it.
if offset is not None:
# Prepare the tarfile object for extraction by telling
# it where to look for the file.
try:
# Get the tarinfo object needed to extract the
# file.
# Read error encountered.
elif self.__extract_offsets:
# Assume there is no such archive member if extract
# offsets are known, but the item can't be found.
else:
# No archive index; fallback to retrieval by name.
# Finally, return the object for the matching archive member.
try:
except KeyError:
"""Returns the index, and extract_offsets from an Archive
opened in read-only mode, allowing additional Archive objects
to reuse the index, in a memory-efficient manner."""
if not self.__extract_offsets:
# If the extraction index doesn't exist, scan the
# complete archive and build one.
return self.__extract_offsets
"""Returns the first package file matching the given hash as a
file-like object. The file-like object is read-only and provides
methods: read(), readline(), readlines(), seek() and tell().
The returned object must be closed before the archive is, and
must not be used after the archive is closed.
'fhash' is the hash name of the file to return.
'pub' is the prefix (name) of the publisher that the package
files are associated with. If not provided, the first file
named after the given hash found in the archive will be used.
(This will be noticeably slower depending on the size of the
archive.)
"""
if not self.__extract_offsets:
# If the extraction index doesn't exist, scan the
# complete archive and build one.
if not pub:
# Scan extract offsets index for the first instance of
# any package file seen for the hash and extract it.
"""Returns a package manifest from the archive.
'pfmri' is the FMRI string or object identifying the package
manifest to extract.
'raw' is an optional boolean indicating whether the raw
content of the Manifest should be returned. If True,
a file-like object containing the content of the manifest.
If False, a Manifest object will be returned.
"""
assert pfmri
try:
except UnknownArchiveFiles:
if raw:
return fobj
return m
"""Return a list of publisher objects for all publishers used
in the archive."""
# If the extraction index doesn't exist, scan the complete
# archive and build one.
# Search through offset index to find publishers
# in use.
# See if this publisher has a .p5i file in the
# archive (needed for signed packages).
"pub.p5i")
try:
except UnknownArchiveFiles:
# No p5i; that's ok.
pfx)
else:
assert pub
"""Private helper method to cleanup temporary files."""
try:
except EnvironmentError as e:
raise apx._convert_error(e)
"""Private helper method to close filehandles."""
# Some archives may not have an index.
# A read error during archive load may cause these to have
# never been set.
if self.__arc_tfile:
self.__arc_tfile = None
if self.__arc_file:
self.__arc_file = None
"""If mode is 'r', this will close the archive file. If mode is
'w', this will write all queued files to the archive and close
it. Further operations on the archive are not possible after
calling this function."""
return
# Add the standard pkg5.repository file before closing the
# index.
"[publisher]\nprefix = {0}\n\n"
# If any publisher objects were cached, then there were
# signed packages present, and p5i information for each
# must be added to the archive.
# A new publisher object is created with a copy of only
# the information that's needed for the archive.
# Create a p5i file.
# Queue the p5i file for addition to the archive.
"pub.p5i")
# Close the index; no more entries can be added.
# If a tracker was provided, setup a progress goal.
idxbytes = 0
if progtrack:
try:
nfiles += 1
except EnvironmentError as e:
raise apx._convert_error(e)
# Add the index file to the archive as the first file; it will
# automatically be marked with a comment identifying the index
# version.
if progtrack:
# Add all queued files to the archive.
# tarfile caches member information for every item
# added by default, which provides fast access to the
# archive contents after generation, but isn't needed
# here (and uses a significant amount of memory).
# Plus popping it off the stack here allows use of
# the object's info to provide progress updates.
if progtrack:
del ti
# Cleanup temporary files.
# Archive created; success!
if progtrack:
"""The absolute path of the archive file."""
return self.__arc_name