#!/usr/bin/python
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
"""
Misc utility functions used by the packaging system.
"""
from __future__ import division
from __future__ import print_function
import OpenSSL.crypto as osc
import calendar
import collections
import datetime
import errno
import fnmatch
import getopt
import locale
import os
import platform
import re
import resource
import shutil
import signal
import simplejson as json
import six
import socket
import struct
import sys
import threading
import time
import traceback
import urllib
import zlib
# ungrouped-imports: pylint: disable=C0412
from binascii import hexlify, unhexlify
from collections import defaultdict
from io import BytesIO
from operator import itemgetter
# Redefining built-in 'range'; pylint: disable=W0622
# Module 'urllib' has no 'parse' member; pylint: disable=E1101
from six.moves import range, zip_longest
from six.moves.urllib.parse import urlsplit, urlparse, urlunparse
from six.moves.urllib.request import pathname2url, url2pathname
from stat import S_IFMT, S_IMODE, S_IRGRP, S_IROTH, S_IRUSR, S_IRWXU, \
S_ISBLK, S_ISCHR, S_ISDIR, S_ISFIFO, S_ISLNK, S_ISREG, S_ISSOCK, \
S_IWUSR, S_IXGRP, S_IXOTH
import pkg.client.api_errors as api_errors
import pkg.portable as portable
import pkg.digest as digest
from pkg import VERSION
from pkg.client import global_settings
from pkg.client.debugvalues import DebugValues
from pkg.client.imagetypes import img_type_names, IMG_NONE
from pkg.pkggzip import PkgGzipFile
from pkg.client.pkgdefs import EXIT_OOPS
# Default path where the temporary directories will be created.
DEFAULT_TEMP_PATH = "/var/tmp"
# Minimum number of days to issue warning before a certificate expires
MIN_WARN_DAYS = datetime.timedelta(days=30)
# Constant string used across many modules as a property name.
SIGNATURE_POLICY = "signature-policy"
# Bug URI Constants (deprecated)
# Line too long; pylint: disable=C0301
BUG_URI_CLI = "https://defect.opensolaris.org/bz/enter_bug.cgi?product=pkg&component=cli"
BUG_URI_GUI = "https://defect.opensolaris.org/bz/enter_bug.cgi?product=pkg&component=gui"
# pylint: enable=C0301
# Comparison types
CMP_UNSIGNED = 0
CMP_ALL = 1
# Traceback message.
def get_traceback_message():
"""This function returns the standard traceback message. A function
is necessary since the _() call must be done at runtime after locale
setup."""
return _("""\n
This is an internal error in pkg(7) version {version}. Please log a
Service Request about this issue including the information above and this
message.""").format(version=VERSION)
def time_to_timestamp(t):
"""convert seconds since epoch to %Y%m%dT%H%M%SZ format"""
# XXX optimize?; pylint: disable=W0511
return time.strftime("%Y%m%dT%H%M%SZ", time.gmtime(t))
def timestamp_to_time(ts):
"""convert %Y%m%dT%H%M%SZ format to seconds since epoch"""
# XXX optimize?; pylint: disable=W0511
return calendar.timegm(time.strptime(ts, "%Y%m%dT%H%M%SZ"))
def timestamp_to_datetime(ts):
"""convert %Y%m%dT%H%M%SZ format to a datetime object"""
return datetime.datetime.strptime(ts,"%Y%m%dT%H%M%SZ")
def copyfile(src_path, dst_path):
"""copy a file, preserving attributes, ownership, etc. where possible"""
fs = os.lstat(src_path)
shutil.copy2(src_path, dst_path)
try:
portable.chown(dst_path, fs.st_uid, fs.st_gid)
except OSError as e:
if e.errno != errno.EPERM:
raise
def copytree(src, dst):
"""Rewrite of shutil.copytree() that can handle special files such as
FIFOs, sockets, and device nodes. It re-creates all symlinks rather
than copying the data behind them, and supports neither the 'symlinks'
nor the 'ignore' keyword arguments of the shutil version.
"""
problem = None
os.makedirs(dst, PKG_DIR_MODE)
src_stat = os.stat(src)
for name in sorted(os.listdir(src)):
s_path = os.path.join(src, name)
d_path = os.path.join(dst, name)
s = os.lstat(s_path)
if S_ISDIR(s.st_mode):
copytree(s_path, d_path)
os.chmod(d_path, S_IMODE(s.st_mode))
os.chown(d_path, s.st_uid, s.st_gid)
os.utime(d_path, (s.st_atime, s.st_mtime))
elif S_ISREG(s.st_mode):
shutil.copyfile(s_path, d_path)
os.chmod(d_path, S_IMODE(s.st_mode))
os.chown(d_path, s.st_uid, s.st_gid)
os.utime(d_path, (s.st_atime, s.st_mtime))
elif S_ISLNK(s.st_mode):
os.symlink(os.readlink(s_path), d_path)
elif S_ISFIFO(s.st_mode):
os.mkfifo(d_path, S_IMODE(s.st_mode))
os.chown(d_path, s.st_uid, s.st_gid)
os.utime(d_path, (s.st_atime, s.st_mtime))
elif S_ISSOCK(s.st_mode):
sock = socket.socket(socket.AF_UNIX)
# os.mknod doesn't work correctly in 64 bit.
run_bit = struct.calcsize("P") * 8
# The s11 fcs version of python doesn't have os.mknod()
# but sock.bind has a path length limitation that we can
# hit when archiving the test suite.
# E1101 Module '{0}' has no '{1}' member
# pylint: disable=E1101
if hasattr(os, "mknod") and run_bit == 32:
os.mknod(d_path, s.st_mode, s.st_dev)
else:
try:
sock.bind(d_path)
sock.close()
except sock.error as _e:
# Store original exception so that the
# real cause of failure can be raised if
# this fails.
problem = sys.exc_info()
continue
os.chown(d_path, s.st_uid, s.st_gid)
os.utime(d_path, (s.st_atime, s.st_mtime))
elif S_ISCHR(s.st_mode) or S_ISBLK(s.st_mode):
# the s11 fcs version of python doesn't have os.mknod()
# E1101 Module '{0}' has no '{1}' member
# pylint: disable=E1101
if hasattr(os, "mknod"):
os.mknod(d_path, s.st_mode, s.st_dev)
os.chown(d_path, s.st_uid, s.st_gid)
os.utime(d_path, (s.st_atime, s.st_mtime))
elif S_IFMT(s.st_mode) == 0xd000: # doors
pass
elif S_IFMT(s.st_mode) == 0xe000: # event ports
pass
else:
print("unknown file type:", oct(S_IFMT(s.st_mode)))
os.chmod(dst, S_IMODE(src_stat.st_mode))
os.chown(dst, src_stat.st_uid, src_stat.st_gid)
os.utime(dst, (src_stat.st_atime, src_stat.st_mtime))
if problem:
six.reraise(problem[0], problem[1], problem[2])
def move(src, dst):
"""Rewrite of shutil.move() that uses our copy of copytree()."""
# If dst is a directory, then we try to move src into it.
if os.path.isdir(dst):
dst = os.path.join(dst,
os.path.basename(src).rstrip(os.path.sep))
try:
os.rename(src, dst)
except EnvironmentError as e:
s = os.lstat(src)
if e.errno == errno.EXDEV:
if S_ISDIR(s.st_mode):
copytree(src, dst)
shutil.rmtree(src)
else:
shutil.copyfile(src, dst)
os.chmod(dst, S_IMODE(s.st_mode))
os.chown(dst, s.st_uid, s.st_gid)
os.utime(dst, (s.st_atime, s.st_mtime))
os.unlink(src)
elif e.errno == errno.EINVAL and S_ISDIR(s.st_mode):
raise shutil.Error("Cannot move a directory '{0}' "
"into itself '{1}'.".format(src, dst))
elif e.errno == errno.ENOTDIR and S_ISDIR(s.st_mode):
raise shutil.Error("Destination path '{0}' already "
"exists".format(dst))
else:
raise
def expanddirs(dirs):
"""given a set of directories, return expanded set that includes
all components"""
out = set()
for d in dirs:
p = d
while p != "":
out.add(p)
p = os.path.dirname(p)
return out
def url_affix_trailing_slash(u):
"""if 'u' donesn't have a trailing '/', append one."""
if u[-1] != '/':
u = u + '/'
return u
_client_version = "pkg/{0} ({1} {2}; {3} {4}; {{0}}; {{1}})".format(
VERSION, portable.util.get_canonical_os_name(), platform.machine(),
portable.util.get_os_release(), platform.version())
def user_agent_str(img, client_name):
"""Return a string that can use to identify the client."""
if not img or img.type is None:
imgtype = IMG_NONE
else:
imgtype = img.type
useragent = _client_version.format(img_type_names[imgtype], client_name)
return useragent
# Valid hostname can be : HOSTNAME or IPv4 addr or IPV6 addr
_hostname_re = re.compile(r"""^(?:[a-zA-Z0-9\-]+[a-zA-Z0-9\-\.]*
|(?:\d{1,3}\.){3}\d{3}
|\[([a-fA-F0-9\.]*:){,7}[a-fA-F0-9\.]+\])$""", re.X)
_invalid_host_chars = re.compile(r".*[^a-zA-Z0-9\-\.:\[\]]+")
_valid_proto = ["file", "http", "https"]
def valid_pub_prefix(prefix):
"""Verify that the publisher prefix only contains valid characters."""
if not prefix:
return False
# This is a workaround for the the hostname_re being slow when
# it comes to finding invalid characters in the prefix string.
if _invalid_host_chars.match(prefix):
# prefix bad chars
return False
if _hostname_re.match(prefix):
return True
return False
def valid_pub_url(url, proxy=False):
"""Verify that the publisher URL contains only valid characters.
If 'proxy' is set to True, some checks are relaxed."""
if not url:
return False
# First split the URL and check if the scheme is one we support
o = urlsplit(url)
if not o[0] in _valid_proto:
return False
if o[0] == "file":
path = urlparse(url, "file", allow_fragments=0)[2]
path = url2pathname(path)
if not os.path.abspath(path):
return False
# No further validation to be done.
return True
# Next verify that the network location is valid
if six.PY3:
host = urllib.parse.splitport(o[1])[0]
else:
host = urllib.splitport(o[1])[0]
if proxy:
# We may have authentication details in the proxy URI, which
# we must ignore when checking for hostname validity.
host_parts = host.split("@")
if len(host_parts) == 2:
host = host[1]
if not host or _invalid_host_chars.match(host):
return False
if _hostname_re.match(host):
return True
return False
def gunzip_from_stream(gz, outfile, hash_func=None, hash_funcs=None,
ignore_hash=False):
"""Decompress a gzipped input stream into an output stream.
The argument 'gz' is an input stream of a gzipped file and 'outfile'
is is an output stream. gunzip_from_stream() decompresses data from
'gz' and writes it to 'outfile', and returns the hexadecimal SHA sum
of that data using the hash_func supplied.
'hash_funcs', if supplied, is a list of hash functions which we should
use to compute the hash. If 'hash_funcs' is supplied, a list of
hexadecimal digests computed using those functions is returned. The
returned list is in the same order as 'hash_funcs'.
If 'ignore_hash' is True, we do not compute a hash when decompressing
the content and do not return any value.
"""
FHCRC = 2
FEXTRA = 4
FNAME = 8
FCOMMENT = 16
if not (hash_func or hash_funcs) and not ignore_hash:
raise ValueError("no hash functions for gunzip_from_stream")
# Read the header
magic = gz.read(2)
if magic != b"\037\213":
raise zlib.error("Not a gzipped file")
method = ord(gz.read(1))
if method != 8:
raise zlib.error("Unknown compression method")
flag = ord(gz.read(1))
gz.read(6) # Discard modtime, extraflag, os
# Discard an extra field
if flag & FEXTRA:
xlen = ord(gz.read(1))
xlen = xlen + 256 * ord(gz.read(1))
gz.read(xlen)
# Discard a null-terminated filename
if flag & FNAME:
while True:
s = gz.read(1)
if not s or s == b"\000":
break
# Discard a null-terminated comment
if flag & FCOMMENT:
while True:
s = gz.read(1)
if not s or s == b"\000":
break
# Discard a 16-bit CRC
if flag & FHCRC:
gz.read(2)
if ignore_hash:
pass
elif hash_funcs:
shasums = []
for f in hash_funcs:
shasums.append(digest.HASH_ALGS[f]())
else:
shasum = hash_func()
dcobj = zlib.decompressobj(-zlib.MAX_WBITS)
while True:
buf = gz.read(64 * 1024)
if buf == b"":
ubuf = dcobj.flush()
if ignore_hash:
pass
elif hash_funcs:
for sha in shasums:
sha.update(ubuf)
else:
shasum.update(ubuf) # pylint: disable=E1101
outfile.write(ubuf)
break
ubuf = dcobj.decompress(buf)
if ignore_hash:
pass
elif hash_funcs:
for sha in shasums:
sha.update(ubuf)
else:
shasum.update(ubuf) # pylint: disable=E1101
outfile.write(ubuf)
if ignore_hash:
return
elif hash_funcs:
hexdigests = []
for sha in shasums:
hexdigests.append(sha.hexdigest())
return hexdigests
return shasum.hexdigest()
class PipeError(Exception):
""" Pipe exception. """
def __init__(self, args=None):
Exception.__init__(self)
self._args = args
def msg(*text):
""" Emit a message. """
try:
print(' '.join([str(l) for l in text]))
except IOError as e:
if e.errno == errno.EPIPE:
raise PipeError(e)
raise
def emsg(*text):
""" Emit a message to sys.stderr. """
try:
print(' '.join([str(l) for l in text]), file=sys.stderr)
except IOError as e:
if e.errno == errno.EPIPE:
raise PipeError(e)
raise
def setlocale(category, loc=None, printer=None):
"""Wraps locale.setlocale(), falling back to the C locale if the desired
locale is broken or unavailable. The 'printer' parameter should be a
function which takes a string and displays it. If 'None' (the default),
setlocale() will print the message to stderr."""
if printer is None:
printer = emsg
try:
locale.setlocale(category, loc)
# Because of Python bug 813449, getdefaultlocale may fail
# with a ValueError even if setlocale succeeds. So we call
# it here to prevent having this error raised if it is
# called later by other non-pkg(7) code.
locale.getdefaultlocale()
except (locale.Error, ValueError):
try:
dl = " '{0}.{1}'".format(*locale.getdefaultlocale())
except ValueError:
dl = ""
printer("Unable to set locale{0}; locale package may be broken "
"or\nnot installed. Reverting to C locale.".format(dl))
locale.setlocale(category, "C")
def N_(message):
"""Return its argument; used to mark strings for localization when
their use is delayed by the program."""
return message
def bytes_to_str(nbytes, fmt="{num:>.2f} {unit}"):
"""Returns a human-formatted string representing the number of bytes
in the largest unit possible.
If provided, 'fmt' should be a string which can be formatted
with a dictionary containing a float 'num' and strings 'unit' and
'shortunit'. The default format prints, for example, '3.23 MB' """
units = [
(_("B"), _("B"), 2**10),
(_("kB"), _("k"), 2**20),
(_("MB"), _("M"), 2**30),
(_("GB"), _("G"), 2**40),
(_("TB"), _("T"), 2**50),
(_("PB"), _("P"), 2**60),
(_("EB"), _("E"), 2**70)
]
for uom, shortuom, limit in units:
# pylint is picky about this message:
# old-division; pylint: disable=W1619
if uom != _("EB") and nbytes >= limit:
# Try the next largest unit of measure unless this is
# the largest or if the byte size is within the current
# unit of measure's range.
continue
if "{num:d}" in fmt:
return fmt.format(
num=int(nbytes / (limit // 2**10)),
unit=uom,
shortunit=shortuom
)
else:
return fmt.format(
num=round(nbytes / (limit // 2**10), 2),
unit=uom,
shortunit=shortuom
)
def get_rel_path(request, uri, pub=None):
"""Calculate the depth of the current request path relative to our
base uri. path_info always ends with a '/' -- so ignore it when
calculating depth."""
rpath = request.path_info
if pub:
rpath = rpath.replace("/{0}/".format(pub), "/")
depth = rpath.count("/") - 1
return ("../" * depth) + uri
def get_pkg_otw_size(action):
"""Takes a file action and returns the over-the-wire size of
a package as an integer. The OTW size is the compressed size,
pkg.csize. If that value isn't available, it returns pkg.size.
If pkg.size isn't available, return zero."""
size = action.attrs.get("pkg.csize")
if size is None:
size = action.attrs.get("pkg.size", 0)
return int(size)
def get_data_digest(data, length=None, return_content=False,
hash_attrs=None, hash_algs=None, hash_func=None):
"""Returns a tuple of ({hash attribute name: hash value}, content)
or a tuple of (hash value, content) if hash_attrs has only one element.
'data' should be a file-like object or a pathname to a file.
'length' should be an integer value representing the size of
the contents of data in bytes.
'return_content' is a boolean value indicating whether the
second tuple value should contain the content of 'data' or
if the content should be discarded during processing.
'hash_attrs' is a list of keys describing the hashes we want to compute
for this data. The keys must be present in 'hash_algs', a dictionary
mapping keys to the factory methods that are used to create objects
to compute them. The factory method must take no parameters, and must
return an object that has 'update()' and 'hexdigest()' methods.
'hash_func' is provided as a convenience to simply hash the data with
a single hash algorithm. The value of 'hash_func' should be the factory
method used to compute that hash value, as described in the previous
paragraph.
"""
bufsz = 128 * 1024
closefobj = False
if isinstance(data, six.string_types):
f = open(data, "rb", bufsz)
closefobj = True
else:
f = data
if length is None:
length = os.stat(data).st_size
# Setup our results dictionary so that each attribute maps to a
# new hash object.
if hash_func:
hsh = hash_func()
else:
if hash_algs is None or hash_attrs is None:
assert False, "get_data_digest without hash_attrs/algs"
hash_results = {}
for attr in hash_attrs:
# "pkg.content-hash" is provided by default and doesn't
# indicate the hash_alg to be used, so when we want to
# calculate the content hash, we'll specify the
# hash_attrs explicitly, such as "file:sha512t_256".
if attr != "pkg.content-hash":
hash_results[attr] = hash_algs[attr]()
# Read the data in chunks and compute the SHA hashes as the data comes
# in. A large read on some platforms (e.g. Windows XP) may fail.
content = BytesIO()
while length > 0:
data = f.read(min(bufsz, length))
if return_content:
content.write(data)
if hash_func:
hsh.update(data)
else:
# update each hash with this data
for attr in hash_attrs:
if attr != "pkg.content-hash":
hash_results[attr].update(
data) # pylint: disable=E1101
l = len(data)
if l == 0:
break
length -= l
content.seek(0)
if closefobj:
f.close()
if hash_func:
return hsh.hexdigest(), content.read()
# The returned dictionary can now be populated with the hexdigests
# instead of the hash objects themselves.
for attr in hash_results:
hash_results[attr] = hash_results[attr].hexdigest()
return hash_results, content.read()
class _GZWriteWrapper(object):
"""Used by compute_compressed_attrs to calculate data size and compute
hashes as the data is written instead of having to read the written data
again later."""
def __init__(self, path, chashes):
"""If path is None, the data will be discarded immediately after
computing size and hashes."""
if path:
self._ofile = open(path, "wb")
else:
self._ofile = None
self._chashes = chashes
self._size = 0
def close(self):
"""Close the file."""
if self._ofile:
self._ofile.close()
self._ofile = None
def flush(self):
"""Flush the file."""
if self._ofile:
self._ofile.flush()
@property
def size(self):
"""Return the size of the file."""
return self._size
def write(self, data):
"""Write data to the file and compute the hashes of the data."""
if self._ofile:
self._ofile.write(data)
self._size += len(data)
for chash_attr in self._chashes:
self._chashes[chash_attr].update(
data) # pylint: disable=E1101
def compute_compressed_attrs(fname, file_path=None, data=None, size=None,
compress_dir=None, bufsz=64*1024, chash_attrs=None, chash_algs=None):
"""Returns the size and one or more hashes of the compressed data. If
the file located at file_path doesn't exist or isn't gzipped, it creates
a file in compress_dir named fname. If compress_dir is None, the
attributes are calculated but no data will be written.
'chash_attrs' is a list of the chash attributes we should compute, with
'chash_algs' being a dictionary that maps the attribute names to the
algorithms used to compute them.
"""
if chash_attrs is None:
chash_attrs = digest.DEFAULT_CHASH_ATTRS
if chash_algs is None:
chash_algs = digest.CHASH_ALGS
chashes = {}
for chash_attr in chash_attrs:
# "pkg.content-hash" is provided by default and doesn't
# indicate the hash_alg to be used, so when we want to
# calculate the content hash, we'll specify the
# hash_attrs explicitly, such as "gzip:sha512t_256".
if chash_attr == "pkg.content-hash":
chashes[chash_attr] = chash_algs["{0}:{1}".format(
digest.EXTRACT_GZIP, digest.PREFERRED_HASH)]()
else:
chashes[chash_attr] = chash_algs[chash_attr]()
#
# This check prevents compressing a file which is already compressed.
# This takes CPU load off the depot on large imports of mostly-the-same
# stuff. And in general it saves disk bandwidth, and on ZFS in
# particular it saves us space in differential snapshots. We also need
# to check that the destination is in the same compression format as
# the source, as we must have properly formed files for chash/csize
# properties to work right.
#
fileneeded = True
if file_path:
if PkgGzipFile.test_is_pkggzipfile(file_path):
fileneeded = False
opath = file_path
if fileneeded:
if compress_dir:
opath = os.path.join(compress_dir, fname)
else:
opath = None
fobj = _GZWriteWrapper(opath, chashes)
ofile = PkgGzipFile(mode="wb", fileobj=fobj)
nbuf = size // bufsz
for n in range(0, nbuf):
l = n * bufsz
h = (n + 1) * bufsz
ofile.write(data[l:h])
m = nbuf * bufsz
ofile.write(data[m:])
ofile.close()
fobj.close()
csize = str(fobj.size)
for attr in chashes:
if attr == "pkg.content-hash":
chashes[attr] = "{0}:{1}:{2}".format(
digest.EXTRACT_GZIP, digest.PREFERRED_HASH,
chashes[attr].hexdigest())
else:
chashes[attr] = chashes[attr].hexdigest()
return csize, chashes
# Compute the SHA hash of the compressed file. In order for this to
# work correctly, we have to use the PkgGzipFile class. It omits
# filename and timestamp information from the gzip header, allowing us
# to generate deterministic hashes for different files with identical
# content.
fs = os.stat(opath)
csize = str(fs.st_size)
with open(opath, "rb") as cfile:
while True:
cdata = cfile.read(bufsz)
# cdata is bytes
if cdata == b"":
break
for chash_attr in chashes:
chashes[chash_attr].update(
cdata) # pylint: disable=E1101
# The returned dictionary can now be populated with the hexdigests
# instead of the hash objects themselves.
for attr in chashes:
if attr == "pkg.content-hash":
chashes[attr] = "{0}:{1}:{2}".format(
digest.EXTRACT_GZIP, digest.PREFERRED_HASH,
chashes[attr].hexdigest())
else:
chashes[attr] = chashes[attr].hexdigest()
return csize, chashes
class ProcFS(object):
"""This class is used as an interface to procfs."""
# Detect whether python is running in 32-bit or 64-bit
# environment based on pointer size.
_running_bit = struct.calcsize("P") * 8
actual_format = {32: {
"long": "l",
"uintptr_t": "I",
"ulong": "L"
},
64: {
"long": "q",
"uintptr_t": "Q",
"ulong": "Q"
}}
_ctype_formats = {
# This dictionary maps basic c types into python format characters
# that can be used with struct.unpack(). The format of this
# dictionary is:
# <ctype>: (<repeat count>, <format char>)
# basic c types (repeat count should always be 1)
# char[] is used to encode character arrays
"char": (1, "c"),
"char[]": (1, "s"),
"int": (1, "i"),
"long": (1, actual_format[_running_bit]["long"]),
"uintptr_t": (1, actual_format[_running_bit]["uintptr_t"]),
"ushort_t": (1, "H"),
# other simple types (repeat count should always be 1)
"ctid_t": (1, "i"), # ctid_t -> id_t -> int
# dev_t -> ulong_t
"dev_t": (1, actual_format[_running_bit]["ulong"]),
"gid_t": (1, "I"), # gid_t -> uid_t -> uint_t
"pid_t": (1, "i"), # pid_t -> int
"poolid_t": (1, "i"), # poolid_t -> id_t -> int
"projid_t": (1, "i"), # projid_t -> id_t -> int
# size_t -> ulong_t
"size_t": (1, actual_format[_running_bit]["ulong"]),
"taskid_t": (1, "i"), # taskid_t -> id_t -> int
# time_t -> long
"time_t": (1, actual_format[_running_bit]["long"]),
"uid_t": (1, "I"), # uid_t -> uint_t
"zoneid_t": (1, "i"), # zoneid_t -> id_t -> int
"id_t": (1, "i"), # id_t -> int
# structures must be represented as character arrays
# sizeof (timestruc_t) = 8 in 32-bit process, and = 16 in 64-bit.
"timestruc_t": (_running_bit // 4, "s"),
}
_timestruct_desc = [
# this list describes a timestruc_t structure
# the entry format is (<ctype>, <repeat count>, <name>)
("time_t", 1, "tv_sec"),
("long", 1, "tv_nsec"),
]
_psinfo_desc = [
# this list describes a psinfo_t structure
# the entry format is: (<ctype>, <repeat count>, <name>)
("int", 1, "pr_flag"),
("int", 1, "pr_nlwp"),
("pid_t", 1, "pr_pid"),
("pid_t", 1, "pr_ppid"),
("pid_t", 1, "pr_pgid"),
("pid_t", 1, "pr_sid"),
("uid_t", 1, "pr_uid"),
("uid_t", 1, "pr_euid"),
("gid_t", 1, "pr_gid"),
("gid_t", 1, "pr_egid"),
("uintptr_t", 1, "pr_addr"),
("size_t", 1, "pr_size"),
("size_t", 1, "pr_rssize"),
("size_t", 1, "pr_pad1"),
("dev_t", 1, "pr_ttydev"),
("ushort_t", 1, "pr_pctcpu"),
("ushort_t", 1, "pr_pctmem"),
("timestruc_t", 1, "pr_start"),
("timestruc_t", 1, "pr_time"),
("timestruc_t", 1, "pr_ctime"),
("char[]", 16, "pr_fname"),
("char[]", 80, "pr_psargs"),
("int", 1, "pr_wstat"),
("int", 1, "pr_argc"),
("uintptr_t", 1, "pr_argv"),
("uintptr_t", 1, "pr_envp"),
("char", 1, "pr_dmodel"),
("char[]", 3, "pr_pad2"),
("taskid_t", 1, "pr_taskid"),
("projid_t", 1, "pr_projid"),
("int", 1, "pr_nzomb"),
("poolid_t", 1, "pr_poolid"),
("zoneid_t", 1, "pr_zoneid"),
("id_t", 1, "pr_contract"),
("int", 1, "pr_filler"),
]
# For 64 bit process, the alignment is off by 4 bytes from pr_pctmem
# field. So add an additional pad here.
if _running_bit == 64:
_psinfo_desc = _psinfo_desc[0:17] + [("int", 1, "dum_pad")] + \
_psinfo_desc[17:]
_struct_descriptions = {
# this list contains all the known structure description lists
# the entry format is: <structure name>: \
# [ <description>, <format string>, <namedtuple> ]
#
# Note that <format string> and <namedtuple> should be assigned
# None in this table, and then they will get pre-populated
# automatically when this class is instantiated
#
"psinfo_t": [_psinfo_desc, None, None],
"timestruc_t": [_timestruct_desc, None, None],
}
# fill in <format string> and <namedtuple> in _struct_descriptions
for struct_name, v in six.iteritems(_struct_descriptions):
desc = v[0]
# update _struct_descriptions with a format string
v[1] = ""
for ctype, count1, name in desc:
count2, fmt_char = _ctype_formats[ctype]
v[1] = v[1] + str(count1 * count2) + fmt_char
# update _struct_descriptions with a named tuple
v[2] = collections.namedtuple(struct_name,
[ i[2] for i in desc ])
@staticmethod
def _struct_unpack(data, name):
"""Unpack 'data' using struct.unpack(). 'name' is the name of
the data we're unpacking and is used to lookup a description
of the data (which in turn is used to build a format string to
decode the data)."""
# lookup the description of the data to unpack
desc, fmt, nt = ProcFS._struct_descriptions[name]
# unpack the data into a list
rv = list(struct.unpack(fmt, data))
# check for any nested data that needs unpacking
for index, v in enumerate(desc):
ctype = v[0]
if ctype not in ProcFS._struct_descriptions:
continue
rv[index] = ProcFS._struct_unpack(rv[index], ctype)
# return the data in a named tuple
return nt(*rv)
@staticmethod
def psinfo():
"""Read the psinfo file and return its contents."""
# This works only on Solaris, in 32-bit or 64-bit mode. It may
# not work on older or newer versions than 5.11. Ideally, we
# would use libproc, or check sbrk(0), but this is expedient.
# In most cases (there's a small chance the file will decode,
# but incorrectly), failure will raise an exception, and we'll
# fail safe.
psinfo_size = 232
if ProcFS._running_bit == 64:
psinfo_size = 288
try:
with open("/proc/self/psinfo", "rb") as f:
psinfo_data = f.read(psinfo_size)
# Catch "Exception"; pylint: disable=W0703
except Exception:
return None
# make sure we got the expected amount of data, otherwise
# unpacking it will fail.
if len(psinfo_data) != psinfo_size:
return None
return ProcFS._struct_unpack(psinfo_data, "psinfo_t")
def __getvmusage():
"""Return the amount of virtual memory in bytes currently in use."""
psinfo = ProcFS.psinfo()
if psinfo is None:
return None
return psinfo.pr_size * 1024
def _prstart():
"""Return the process start time expressed as a floating point number
in seconds since the epoch, in UTC."""
psinfo = ProcFS.psinfo()
if psinfo is None:
return 0.0
return psinfo.pr_start.tv_sec + (float(psinfo.pr_start.tv_nsec) / 1e9)
def out_of_memory():
"""Return an out of memory message, for use in a MemoryError handler."""
# figure out how much memory we're using (note that we could run out
# of memory while doing this, so check for that.
vsz = None
try:
vmusage = __getvmusage()
if vmusage is not None:
vsz = bytes_to_str(vmusage, fmt="{num:.0f}{unit}")
except (MemoryError, EnvironmentError) as __e:
if isinstance(__e, EnvironmentError) and \
__e.errno != errno.ENOMEM:
raise
if vsz is not None:
error = """\
There is not enough memory to complete the requested operation. At least
{vsz} of virtual memory was in use by this command before it ran out of memory.
You must add more memory (swap or physical) or allow the system to access more
existing memory, or quit other programs that may be consuming memory, and try
the operation again."""
else:
error = """\
There is not enough memory to complete the requested operation. You must
add more memory (swap or physical) or allow the system to access more existing
memory, or quit other programs that may be consuming memory, and try the
operation again."""
return _(error).format(**locals())
# EmptyI for argument defaults
EmptyI = tuple()
# ImmutableDict for argument defaults
class ImmutableDict(dict):
# Missing docstring; pylint: disable=C0111
# Unused argument; pylint: disable=W0613
def __init__(self, default=EmptyI):
dict.__init__(self, default)
def __setitem__(self, item, value):
self.__oops()
def __delitem__(self, item):
self.__oops()
def pop(self, item, default=None):
self.__oops()
def popitem(self):
self.__oops()
def setdefault(self, item, default=None):
self.__oops()
def update(self, d):
self.__oops()
def copy(self):
return ImmutableDict()
def clear(self):
self.__oops()
@staticmethod
def __oops():
raise TypeError("Item assignment to ImmutableDict")
# A way to have a dictionary be a property
class DictProperty(object):
# Missing docstring; pylint: disable=C0111
class __InternalProxy(object):
def __init__(self, obj, fget, fset, fdel, iteritems, keys,
values, iterator, fgetdefault, fsetdefault, update, pop):
self.__obj = obj
self.__fget = fget
self.__fset = fset
self.__fdel = fdel
self.__iteritems = iteritems
self.__keys = keys
self.__values = values
self.__iter = iterator
self.__fgetdefault = fgetdefault
self.__fsetdefault = fsetdefault
self.__update = update
self.__pop = pop
def __getitem__(self, key):
if self.__fget is None:
raise AttributeError("unreadable attribute")
return self.__fget(self.__obj, key)
def __setitem__(self, key, value):
if self.__fset is None:
raise AttributeError("can't set attribute")
self.__fset(self.__obj, key, value)
def __delitem__(self, key):
if self.__fdel is None:
raise AttributeError("can't delete attribute")
self.__fdel(self.__obj, key)
def iteritems(self):
if self.__iteritems is None:
raise AttributeError("can't iterate over items")
return self.__iteritems(self.__obj)
# for Python 3 compatibility
def items(self):
return self.iteritems()
def keys(self):
if self.__keys is None:
raise AttributeError("can't iterate over keys")
return self.__keys(self.__obj)
def values(self):
if self.__values is None:
raise AttributeError("can't iterate over "
"values")
return self.__values(self.__obj)
def get(self, key, default=None):
if self.__fgetdefault is None:
raise AttributeError("can't use get")
return self.__fgetdefault(self.__obj, key, default)
def setdefault(self, key, default=None):
if self.__fsetdefault is None:
raise AttributeError("can't use setdefault")
return self.__fsetdefault(self.__obj, key, default)
def update(self, d):
if self.__update is None:
raise AttributeError("can't use update")
return self.__update(self.__obj, d)
def pop(self, d, default):
if self.__pop is None:
raise AttributeError("can't use pop")
return self.__pop(self.__obj, d, default)
def __iter__(self):
if self.__iter is None:
raise AttributeError("can't iterate")
return self.__iter(self.__obj)
def __init__(self, fget=None, fset=None, fdel=None, iteritems=None,
keys=None, values=None, iterator=None, doc=None, fgetdefault=None,
fsetdefault=None, update=None, pop=None):
self.__fget = fget
self.__fset = fset
self.__fdel = fdel
self.__iteritems = iteritems
self.__doc__ = doc
self.__keys = keys
self.__values = values
self.__iter = iterator
self.__fgetdefault = fgetdefault
self.__fsetdefault = fsetdefault
self.__update = update
self.__pop = pop
def __get__(self, obj, objtype=None):
# Unused argument; pylint: disable=W0613
if obj is None:
return self
return self.__InternalProxy(obj, self.__fget, self.__fset,
self.__fdel, self.__iteritems, self.__keys, self.__values,
self.__iter, self.__fgetdefault, self.__fsetdefault,
self.__update, self.__pop)
def build_cert(path, uri=None, pub=None):
"""Take the file given in path, open it, and use it to create
an X509 certificate object.
'uri' is an optional value indicating the uri associated with or that
requires the certificate for access.
'pub' is an optional string value containing the name (prefix) of a
related publisher."""
try:
cf = open(path, "rb")
certdata = cf.read()
cf.close()
except EnvironmentError as e:
if e.errno == errno.ENOENT:
raise api_errors.NoSuchCertificate(path, uri=uri,
publisher=pub)
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(e.filename)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(e.filename)
raise
try:
return osc.load_certificate(osc.FILETYPE_PEM, certdata)
except osc.Error as e:
# OpenSSL.crypto.Error
raise api_errors.InvalidCertificate(path, uri=uri,
publisher=pub)
def validate_ssl_cert(ssl_cert, prefix=None, uri=None):
"""Validates the indicated certificate and returns a pyOpenSSL object
representing it if it is valid."""
cert = build_cert(ssl_cert, uri=uri, pub=prefix)
if cert.has_expired():
raise api_errors.ExpiredCertificate(ssl_cert, uri=uri,
publisher=prefix)
now = datetime.datetime.utcnow()
nb = cert.get_notBefore()
# strptime's first argument must be str
t = time.strptime(force_str(nb), "%Y%m%d%H%M%SZ")
nbdt = datetime.datetime.utcfromtimestamp(
calendar.timegm(t))
# PyOpenSSL's has_expired() doesn't validate the notBefore
# time on the certificate. Don't ask me why.
if nbdt > now:
raise api_errors.NotYetValidCertificate(ssl_cert, uri=uri,
publisher=prefix)
na = cert.get_notAfter()
t = time.strptime(force_str(na), "%Y%m%d%H%M%SZ")
nadt = datetime.datetime.utcfromtimestamp(
calendar.timegm(t))
diff = nadt - now
if diff <= MIN_WARN_DAYS:
raise api_errors.ExpiringCertificate(ssl_cert, uri=uri,
publisher=prefix, days=diff.days)
return cert
def binary_to_hex(s):
"""Converts a string of bytes to a hexadecimal representation.
"""
return force_str(hexlify(s))
def hex_to_binary(s):
"""Converts a string of hex digits to the binary representation.
"""
return unhexlify(s)
def config_temp_root():
"""Examine the environment. If the environment has set TMPDIR, TEMP,
or TMP, return None. This tells tempfile to use the environment
settings when creating temporary files/directories. Otherwise,
return a path that the caller should pass to tempfile instead."""
# In Python's tempfile module, the default temp directory
# includes some paths that are suboptimal for holding large numbers
# of files. If the user hasn't set TMPDIR, TEMP, or TMP in the
# environment, override the default directory for creating a tempfile.
tmp_envs = [ "TMPDIR", "TEMP", "TMP" ]
for ev in tmp_envs:
env_val = os.getenv(ev)
if env_val:
return None
return DEFAULT_TEMP_PATH
def get_temp_root_path():
"""Return the directory path where the temporary directories or
files should be created. If the environment has set TMPDIR
or TEMP or TMP then return the corresponding value else return the
default value."""
temp_env = [ "TMPDIR", "TEMP", "TMP" ]
for env in temp_env:
env_val = os.getenv(env)
if env_val:
return env_val
return DEFAULT_TEMP_PATH
def parse_uri(uri, cwd=None):
"""Parse the repository location provided and attempt to transform it
into a valid repository URI.
'cwd' is the working directory to use to turn paths into an absolute
path. If not provided, the current working directory is used.
"""
if uri.find("://") == -1 and not uri.startswith("file:/"):
# Convert the file path to a URI.
if not cwd:
uri = os.path.abspath(uri)
elif not os.path.isabs(uri):
uri = os.path.normpath(os.path.join(cwd, uri))
uri = urlunparse(("file", "",
pathname2url(uri), "", "", ""))
scheme, netloc, path, params, query, fragment = \
urlparse(uri, "file", allow_fragments=0)
scheme = scheme.lower()
if scheme == "file":
# During urlunparsing below, ensure that the path starts with
# only one '/' character, if any are present.
if path.startswith("/"):
path = "/" + path.lstrip("/")
# Rebuild the URI with the sanitized components.
return urlunparse((scheme, netloc, path, params,
query, fragment))
def makedirs(pathname):
"""Create a directory at the specified location if it does not
already exist (including any parent directories) re-raising any
unexpected exceptions as ApiExceptions.
"""
try:
os.makedirs(pathname, PKG_DIR_MODE)
except EnvironmentError as e:
if e.filename == pathname and (e.errno == errno.EEXIST or
os.path.exists(e.filename)):
return
elif e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
elif e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
elif e.errno != errno.EEXIST or e.filename != pathname:
raise
class DummyLock(object):
"""This has the same external interface as threading.Lock,
but performs no locking. This is a placeholder object for situations
where we want to be able to do locking, but don't always need a
lock object present. The object has a held value, that is used
for _is_owned. This is informational and doesn't actually
provide mutual exclusion in any way whatsoever."""
# Missing docstring; pylint: disable=C0111
def __init__(self):
self.held = False
def acquire(self, blocking=1):
# Unused argument; pylint: disable=W0613
self.held = True
return True
def release(self):
self.held = False
return
def _is_owned(self):
return self.held
@property
def locked(self):
return self.held
class Singleton(type):
"""Set __metaclass__ to Singleton to create a singleton.
See http://en.wikipedia.org/wiki/Singleton_pattern """
def __init__(cls, name, bases, dictionary):
super(Singleton, cls).__init__(name, bases, dictionary)
cls.instance = None
def __call__(cls, *args, **kw):
if cls.instance is None:
cls.instance = super(Singleton, cls).__call__(*args,
**kw)
return cls.instance
EmptyDict = ImmutableDict()
# Setting the python file buffer size to 128k gives substantial performance
# gains on certain files.
PKG_FILE_BUFSIZ = 128 * 1024
PKG_FILE_MODE = S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH
PKG_DIR_MODE = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
PKG_RO_FILE_MODE = S_IRUSR | S_IRGRP | S_IROTH
def relpath(path, start="."):
"""Version of relpath to workaround python bug:
http://bugs.python.org/issue5117
"""
if path and start and start == "/" and path[0] == "/":
return path.lstrip("/")
return os.path.relpath(path, start=start)
def recursive_chown_dir(d, uid, gid):
"""Change the ownership of all files under directory d to uid:gid."""
for dirpath, dirnames, filenames in os.walk(d):
for name in dirnames:
path = os.path.join(dirpath, name)
portable.chown(path, uid, gid)
for name in filenames:
path = os.path.join(dirpath, name)
portable.chown(path, uid, gid)
def opts_parse(op, args, opts_table, opts_mapping, usage_cb=None,
use_cli_opts=True, **opts_kv):
"""Generic table-based options parsing function. Returns a tuple
consisting of a list of parsed options in the form (option, argument)
and the remaining unparsed options. The parsed-option list may contain
duplicates if an option is passed multiple times.
'op' is the operation being performed.
'args' is the arguments that should be parsed.
'opts_table' is a list of options the operation supports.
The format of the list entries should be a tuple containing the
option and its default value:
(option, default_value, [valid values], [json schema])
It is valid to have other entries in the list when they are required
for additional option processing elsewhere. These are ignore here. If
the list entry is a tuple it must conform to the format oulined above.
The default value not only represents the default value assigned to the
option, but it also implicitly determines how the option is parsed. If
the default value is True or False, the option doesn't take any
arguments, can only be specified once, and if specified it inverts the
default value. If the default value is 0, the option doesn't take any
arguments, can be specified multiple times, and if specified its value
will be the number of times it was seen. If the default value is
None, the option requires an argument, can only be specified once, and
if specified its value will be its argument string. If the default
value is an empty list, the option requires an argument, may be
specified multiple times, and if specified its value will be a list
with all the specified argument values.
'opts_mapping' is a dict containing a mapping between the option name
and the short and long CLI specifier for that option in the form
{ option : (short, long), ... }
An example of a short opt is "f", which maps to a "-f" option. An
example of a long opt is "foo", which maps to a "--foo" option. Option
is the value of this option in the parsed option dictionary.
'usage_cb' is a function pointer that should display usage information
and will be invoked if invalid arguments are detected.
'use_cli_opts' is to indicate the option type is a CLI option or
a key-value pair option.
'opts_kv' is the user provided opts that should be parsed. It is a
dictionary with key as option name and value as option argument.
"""
if use_cli_opts:
# list for getopt long options
opts_l_list = []
# getopt str for short options
opts_s_str = ""
# dict to map options returned by getopt to keys
opts_keys = dict()
else:
opts_name_mapping = {}
for entry in opts_table:
# option table contains functions for verification, ignore here
if type(entry) != tuple:
continue
if len(entry) == 2:
opt, default = entry
elif len(entry) == 3:
opt, default, dummy_valid_args = entry
elif len(entry) == 4:
opt, default, dummy_valid_args, dummy_schema = entry
if use_cli_opts:
assert opt in opts_mapping
sopt, lopt = opts_mapping[opt]
# make sure an option was specified
assert sopt or lopt
if lopt != "":
if default is None or type(default) == list:
opts_l_list.append("{0}=".format(lopt))
else:
opts_l_list.append("{0}".format(lopt))
opts_keys["--{0}".format(lopt)] = opt
if sopt != "":
if default is None or type(default) == list:
opts_s_str += "{0}:".format(sopt)
else:
opts_s_str += "{0}".format(sopt)
opts_keys["-{0}".format(sopt)] = opt
else:
# Add itself as a mapping for validation.
opts_name_mapping[opt] = opt
if opt in opts_mapping:
optn = opts_mapping[opt]
if optn:
opts_name_mapping[optn] = opt
# Parse options.
if use_cli_opts:
try:
opts, pargs = getopt.getopt(args, opts_s_str,
opts_l_list)
except getopt.GetoptError as e:
usage_cb(_("illegal option -- {0}").format(e.opt),
cmd=op)
else:
opts = opts_kv
def get_default(option):
"""Find the default value for a given option from opts_table."""
for x in opts_table:
if type(x) != tuple:
continue
if len(x) == 2:
opt, default = x
elif len(x) == 3:
opt, default, dummy_valid_args = x
elif len(x) == 4:
opt, default, dummy_valid_args, \
dummy_schema = x
if option == opt:
return default
def process_opts(opt, arg, opt_dict):
"""Process option values."""
# Determine required option type based on the default value.
default = get_default(opt)
if use_cli_opts:
# Handle duplicates for integer and list types.
if type(default) == int:
if opt in opt_dict:
opt_dict[opt] += 1
else:
opt_dict[opt] = 1
return
if type(default) == list:
if opt in opt_dict:
opt_dict[opt].append(arg)
else:
opt_dict[opt] = [arg]
return
# Boolean and string types can't be repeated.
if opt in opt_dict:
raise api_errors.InvalidOptionError(
api_errors.InvalidOptionError.OPT_REPEAT, [opt])
# For boolean options we have to toggle the default value
# when in CLI mode.
if type(default) == bool:
if use_cli_opts:
opt_dict[opt] = not default
else:
opt_dict[opt] = arg
else:
opt_dict[opt] = arg
# Assemble the options dictionary by passing in the right data types
# and take care of duplicates.
opt_dict = {}
if use_cli_opts:
for x in opts:
cli_opt, arg = x
opt = opts_keys[cli_opt]
process_opts(opt, arg, opt_dict)
return opt_dict, pargs
for k, v in opts.items():
cli_opt, arg = k, v
if cli_opt in opts_name_mapping:
cli_opt = opts_name_mapping[cli_opt]
else:
raise api_errors.InvalidOptionError(
api_errors.InvalidOptionError.GENERIC,
[cli_opt])
process_opts(cli_opt, arg, opt_dict)
return opt_dict
def api_cmdpath():
"""Returns the path to the executable that is invoking the api client
interfaces."""
cmdpath = None
if global_settings.client_args[0]:
cmdpath = os.path.realpath(os.path.join(sys.path[0],
os.path.basename(global_settings.client_args[0])))
if "PKG_CMDPATH" in os.environ:
cmdpath = os.environ["PKG_CMDPATH"]
# DebugValues is a singleton, hence no 'self' arg; pylint: disable=E1120
if DebugValues.get_value("simulate_cmdpath"):
cmdpath = DebugValues.get_value("simulate_cmdpath")
return cmdpath
def api_pkgcmd():
"""When running a pkg(1) command from within a packaging module, try
to use the same pkg(1) path as our current invocation. If we're
running pkg(1) from some other command (like the gui updater) then
assume that pkg(1) is in the default path."""
pkg_bin = "pkg"
cmdpath = api_cmdpath()
if cmdpath and os.path.basename(cmdpath) == "pkg":
try:
# check if the currently running pkg command
# exists and is accessible.
os.stat(cmdpath)
pkg_bin = cmdpath
except OSError:
pass
pkg_cmd = [sys.executable] + [pkg_bin]
# propagate debug options
for k, v in six.iteritems(DebugValues):
pkg_cmd.append("-D")
pkg_cmd.append("{0}={1}".format(k, v))
return pkg_cmd
def liveroot():
"""Return path to the current live root image, i.e. the image
that we are running from."""
# DebugValues is a singleton, hence no 'self' arg; pylint: disable=E1120
live_root = DebugValues.get_value("simulate_live_root")
if not live_root and "PKG_LIVE_ROOT" in os.environ:
live_root = os.environ["PKG_LIVE_ROOT"]
if not live_root:
live_root = "/"
return live_root
def spaceavail(path):
"""Find out how much space is available at the specified path if
it exists; return -1 if path doesn't exist"""
try:
res = os.statvfs(path)
return res.f_frsize * res.f_bavail
except OSError:
return -1
def get_dir_size(path):
"""Return the size (in bytes) of a directory and all of its contents."""
try:
return sum(
os.path.getsize(os.path.join(d, fname))
for d, dnames, fnames in os.walk(path)
for fname in fnames
)
except EnvironmentError as e:
# Access to protected member; pylint: disable=W0212
raise api_errors._convert_error(e)
def get_listing(desired_field_order, field_data, field_values, out_format,
def_fmt, omit_headers, escape_output=True):
"""Returns a string containing a listing defined by provided values
in the specified output format.
'desired_field_order' is the list of the fields to show in the order
they should be output left to right.
'field_data' is a dictionary of lists of the form:
{
field_name1: {
[(output formats), field header, initial field value]
},
field_nameN: {
[(output formats), field header, initial field value]
}
}
'field_values' is a generator or list of dictionaries of the form:
{
field_name1: field_value,
field_nameN: field_value
}
'out_format' is the format to use for output. Currently 'default',
'tsv', 'json', and 'json-formatted' are supported. The first is
intended for columnar, human-readable output, and the others for
parsable output.
'def_fmt' is the default Python formatting string to use for the
'default' human-readable output. It must match the fields defined
in 'field_data'.
'omit_headers' is a boolean specifying whether headers should be
included in the listing. (If applicable to the specified output
format.)
'escape_output' is an optional boolean indicating whether shell
metacharacters or embedded control sequences should be escaped
before display. (If applicable to the specified output format.)
"""
# Missing docstring; pylint: disable=C0111
# Custom key function for preserving field ordering
def key_fields(item):
return desired_field_order.index(get_header(item))
# Functions for manipulating field_data records
def filter_default(record):
return "default" in record[0]
def filter_tsv(record):
return "tsv" in record[0]
def get_header(record):
return record[1]
def get_value(record):
return record[2]
def quote_value(val):
if out_format == "tsv":
# Expand tabs if tsv output requested.
val = val.replace("\t", " " * 8)
nval = val
# Escape bourne shell metacharacters.
for c in ("\\", " ", "\t", "\n", "'", "`", ";", "&", "(", ")",
"|", "^", "<", ">"):
nval = nval.replace(c, "\\" + c)
return nval
def set_value(entry):
val = entry[1]
multi_value = False
if isinstance(val, (list, tuple, set, frozenset)):
multi_value = True
elif val == "":
entry[0][2] = '""'
return
elif val is None:
entry[0][2] = ''
return
else:
val = [val]
nval = []
for v in val:
if v == "":
# Indicate empty string value using "".
nval.append('""')
elif v is None:
# Indicate no value using empty string.
nval.append('')
elif escape_output:
# Otherwise, escape the value to be displayed.
nval.append(quote_value(str(v)))
else:
# Caller requested value not be escaped.
nval.append(str(v))
val = " ".join(nval)
nval = None
if multi_value:
val = "({0})".format(val)
entry[0][2] = val
if out_format == "default":
# Create a formatting string for the default output
# format.
fmt = def_fmt
filter_func = filter_default
elif out_format == "tsv":
# Create a formatting string for the tsv output
# format.
num_fields = sum(
1 for k in field_data
if filter_tsv(field_data[k])
)
fmt = "\t".join('{{{0}}}'.format(x) for x in range(num_fields))
filter_func = filter_tsv
elif out_format == "json" or out_format == "json-formatted":
args = { "sort_keys": True }
if out_format == "json-formatted":
args["indent"] = 2
# 'json' formats always include any extra fields returned;
# any explicitly named fields are only included if 'json'
# is explicitly listed.
def fmt_val(v):
if isinstance(v, six.string_types):
return v
if isinstance(v, (list, tuple, set, frozenset)):
return [fmt_val(e) for e in v]
if isinstance(v, dict):
for k, e in six.iteritems(v):
v[k] = fmt_val(e)
return v
return str(v)
output = json.dumps([
dict(
(k, fmt_val(entry[k]))
for k in entry
if k not in field_data or "json" in field_data[k][0]
)
for entry in field_values
], **args)
if out_format == "json-formatted":
# Include a trailing newline for readability.
return output + "\n"
return output
# Extract the list of headers from the field_data dictionary. Ensure
# they are extracted in the desired order by using the custom sort
# function.
hdrs = map(get_header, sorted(filter(filter_func,
field_data.values()), key=key_fields))
# Output a header if desired.
output = ""
if not omit_headers:
output += fmt.format(*hdrs)
output += "\n"
for entry in field_values:
# In Python 3, map() returns an iterator and will not process
# elements unless being called, so we turn it into a list to
# force it to process elements.
list(map(set_value, (
(field_data[f], v)
for f, v in six.iteritems(entry)
if f in field_data
)))
values = map(get_value, sorted(filter(filter_func,
field_data.values()), key=key_fields))
output += fmt.format(*values)
output += "\n"
return output
def truncate_file(f, size=0):
"""Truncate the specified file."""
try:
f.truncate(size)
except IOError:
pass
except OSError as e:
# Access to protected member; pylint: disable=W0212
raise api_errors._convert_error(e)
def flush_output():
"""flush stdout and stderr"""
try:
sys.stdout.flush()
except IOError:
pass
except OSError as e:
# Access to protected member; pylint: disable=W0212
raise api_errors._convert_error(e)
try:
sys.stderr.flush()
except IOError:
pass
except OSError as e:
# Access to protected member; pylint: disable=W0212
raise api_errors._convert_error(e)
# valid json types
json_types_immediates = (bool, float, six.integer_types, six.string_types,
type(None))
json_types_collections = (dict, list)
json_types = tuple(json_types_immediates + json_types_collections)
json_debug = False
def json_encode(name, data, desc, commonize=None, je_state=None):
"""A generic json encoder.
'name' a descriptive name of the data we're encoding. If encoding a
class, this would normally be the class name. 'name' is used when
displaying errors to identify the data that caused the errors.
'data' data to encode.
'desc' a description of the data to encode.
'commonize' a list of objects that should be cached by reference.
this is used when encoding objects which may contain multiple
references to a single object. In this case, each reference will be
replaced with a unique id, and the object that was pointed to will
only be encoded once. This ensures that upon decoding we can restore
the original object and all references to it."""
# debugging
if je_state is None and json_debug:
print("json_encode name: ", name, file=sys.stderr)
print("json_encode data: ", data, file=sys.stderr)
# we don't encode None
if data is None:
return None
# initialize parameters to default
if commonize is None:
commonize = frozenset()
if je_state is None:
# this is the first invocation of this function, so "data"
# points to the top-level object that we want to encode. this
# means that if we're commonizing any objects we should
# finalize the object cache when we're done encoding this
# object.
finish = True
# initialize recursion state
obj_id = [0]
obj_cache = {}
je_state = [obj_id, obj_cache, commonize]
else:
# we're being invoked recursively, do not finalize the object
# cache (since that will be done by a previous invocation of
# this function).
finish = False
# get recursion state
obj_id, obj_cache, commonize_old = je_state
# check if we're changing the set of objects to commonize
if not commonize:
commonize = commonize_old
else:
# update the set of objects to commonize
# make a copy so we don't update our callers state
commonize = frozenset(commonize_old | commonize)
je_state = [obj_id, obj_cache, commonize]
# verify state
assert type(name) == str
assert type(obj_cache) == dict
assert type(obj_id) == list and len(obj_id) == 1 and obj_id[0] >= 0
assert type(commonize) == frozenset
assert type(je_state) == list and len(je_state) == 3
def je_return(name, data, finish, je_state):
"""if necessary, finalize the object cache and merge it into
the state data.
while encoding, the object cache is a dictionary which
contains tuples consisting of an assigned unique object id
(obj_id) and an encoded object. these tuples are hashed by
the python object id of the original un-encoded python object.
so the hash contains:
{ id(<obj>): ( <obj_id>, <obj_state> ) }
when we finish the object cache we update it so that it
contains just encoded objects hashed by their assigned object
id (obj_id). so the hash contains:
{ str(<obj_id>): <obj_state> }
then we merge the state data and object cache into a single
dictionary and return that.
"""
# Unused argument; pylint: disable=W0613
if not finish:
return data
# json.dump converts integer dictionary keys into strings, so
# we'll convert the object id keys (which are integers) into
# strings (that way we're encoder/decoder independent).
obj_cache = je_state[1]
obj_cache2 = {}
for obj_id, obj_state in six.itervalues(obj_cache):
obj_cache2[str(obj_id)] = obj_state
data = { "json_state": data, "json_objects": obj_cache2 }
# Value 'DebugValues' is unsubscriptable;
# pylint: disable=E1136
if DebugValues["plandesc_validate"]:
json_validate(name, data)
# debugging
if json_debug:
print("json_encode finished name: ", name,
file=sys.stderr)
print("json_encode finished data: ", data,
file=sys.stderr)
return data
# check if the description is a type object
if isinstance(desc, type):
desc_type = desc
else:
# get the expected data type from the description
desc_type = type(desc)
# get the data type
data_type = getattr(data, "__metaclass__", type(data))
# sanity check that the data type matches the description
assert issubclass(data_type, desc_type), \
"unexpected {0} for {1}, expected: {2}, value: {3}".format(
data_type, name, desc_type, data)
# The following situation is only true for Python 2.
# We should not see unicode strings getting passed in. The assert is
# necessary since we use the PkgDecoder hook function during json_decode
# to convert unicode objects back into escaped str objects, which would
# otherwise do that conversion unintentionally.
if six.PY2:
assert not isinstance(data_type, six.text_type), \
"unexpected unicode string: {0}".format(data)
# we don't need to do anything for basic types
for t in json_types_immediates:
if issubclass(desc_type, t):
return je_return(name, data, finish, je_state)
# encode elements nested in a dictionary like object
# return elements in a dictionary
if desc_type in (dict, collections.defaultdict):
# we always return a new dictionary
rv = {}
# check if we're not encoding nested elements
if len(desc) == 0:
rv.update(data)
return je_return(name, rv, finish, je_state)
# lookup the first descriptor to see if we have
# generic type description.
desc_k, desc_v = list(desc.items())[0]
# if the key in the first type pair is a type then we
# have a generic type description that applies to all
# keys and values in the dictionary.
# check if the description is a type object
if isinstance(desc_k, type):
# there can only be one generic type desc
assert len(desc) == 1
# encode all key / value pairs
for k, v in six.iteritems(data):
# encode the key
name2 = "{0}[{1}].key()".format(name, desc_k)
k2 = json_encode(name2, k, desc_k,
je_state=je_state)
# encode the value
name2 = "{0}[{1}].value()".format(name, desc_k)
v2 = json_encode(name2, v, desc_v,
je_state=je_state)
# save the result
rv[k2] = v2
return je_return(name, rv, finish, je_state)
# we have element specific value type descriptions.
# encode the specific values.
rv.update(data)
for desc_k, desc_v in six.iteritems(desc):
# check for the specific key
if desc_k not in rv:
continue
# encode the value
name2 = "{0}[{1}].value()".format(name, desc_k)
rv[desc_k] = json_encode(name2, rv[desc_k], desc_v,
je_state=je_state)
return je_return(name, rv, finish, je_state)
# encode elements nested in a list like object
# return elements in a list
if desc_type in (tuple, list, set, frozenset):
# we always return a new list
rv = []
# check for an empty list since we use izip_longest(zip_longest
# in python 3)
if len(data) == 0:
return je_return(name, rv, finish, je_state)
# check if we're not encoding nested elements
if len(desc) == 0:
rv.extend(data)
return je_return(name, rv, finish, je_state)
# don't accidentally generate data via izip_longest(zip_longest
# in python 3)
assert len(data) >= len(desc), \
"{0:d} >= {1:d}".format(len(data), len(desc))
i = 0
for data2, desc2 in zip_longest(data, desc,
fillvalue=list(desc)[0]):
name2 = "{0}[{1:d}]".format(name, i)
i += 1
rv.append(json_encode(name2, data2, desc2,
je_state=je_state))
return je_return(name, rv, finish, je_state)
# if we're commonizing this object and it's already been encoded then
# just return its encoded object id.
if desc_type in commonize and id(data) in obj_cache:
rv = obj_cache[id(data)][0]
return je_return(name, rv, finish, je_state)
# find an encoder for this class, which should be:
# <class>.getstate(obj, je_state)
encoder = getattr(desc_type, "getstate", None)
assert encoder is not None, "no json encoder for: {0}".format(desc_type)
# encode the data
rv = encoder(data, je_state)
assert rv is not None, "json encoder returned none for: {0}".format(
desc_type)
# if we're commonizing this object, then assign it an object id and
# save that object id and the encoded object into the object cache
# (which is indexed by the python id for the object).
if desc_type in commonize:
obj_cache[id(data)] = (obj_id[0], rv)
rv = obj_id[0]
obj_id[0] += 1
# return the encoded element
return je_return(name, rv, finish, je_state)
def json_decode(name, data, desc, commonize=None, jd_state=None):
"""A generic json decoder.
'name' a descriptive name of the data. (used to identify unexpected
data errors.)
'desc' a programmatic description of data types.
'data' data to decode."""
# debugging
if jd_state is None and json_debug:
print("json_decode name: ", name, file=sys.stderr)
print("json_decode data: ", data, file=sys.stderr)
# we don't decode None
if data is None:
return data
# initialize parameters to default
if commonize is None:
commonize = frozenset()
if jd_state is None:
# this is the first invocation of this function, so when we
# return we're done decoding data.
finish = True
# first time here, initialize recursion state
if not commonize:
# no common state
obj_cache = {}
else:
# load commonized state
obj_cache = data["json_objects"]
data = data["json_state"]
jd_state = [obj_cache, commonize]
else:
# we're being invoked recursively.
finish = False
obj_cache, commonize_old = jd_state
# check if the first object using commonization
if not commonize_old and commonize:
obj_cache = data["json_objects"]
data = data["json_state"]
# merge in any new commonize requests
je_state_changed = False
# check if we're updating the set of objects to commonize
if not commonize:
commonize = commonize_old
else:
# update the set of objects to commonize
# make a copy so we don't update our callers state.
commonize = frozenset(commonize_old | commonize)
je_state_changed = True
if je_state_changed:
jd_state = [obj_cache, commonize]
# verify state
assert type(name) == str, "type(name) == {0}".format(type(name))
assert type(obj_cache) == dict
assert type(commonize) == frozenset
assert type(jd_state) == list and len(jd_state) == 2
def jd_return(name, data, desc, finish, jd_state):
"""Check if we're done decoding data."""
# Unused argument; pylint: disable=W0613
# check if the description is a type object
if isinstance(desc, type):
desc_type = desc
else:
# get the expected data type from the description
desc_type = type(desc)
# get the data type
data_type = getattr(data, "__metaclass__", type(data))
# sanity check that the data type matches the description
assert issubclass(data_type, desc_type), \
"unexpected {0} for {1}, expected: {2}, value: {3}".format(
data_type, name, desc_type, data)
if not finish:
return data
# debugging
if json_debug:
print("json_decode finished name: ", name,
file=sys.stderr)
print("json_decode finished data: ", data,
file=sys.stderr)
return data
# check if the description is a type object
if isinstance(desc, type):
desc_type = desc
else:
# get the expected data type from the description
desc_type = type(desc)
# we don't need to do anything for basic types
for t in json_types_immediates:
if issubclass(desc_type, t):
return jd_return(name, data, desc, finish, jd_state)
# decode elements nested in a dictionary
# return elements in the specified dictionary like object
if isinstance(desc, dict):
# allocate the return object. we don't just use
# type(desc) because that won't work for things like
# collections.defaultdict types.
rv = desc.copy()
rv.clear()
# check if we're not decoding nested elements
if len(desc) == 0:
rv.update(data)
return jd_return(name, rv, desc, finish, jd_state)
# lookup the first descriptor to see if we have
# generic type description.
desc_k, desc_v = list(desc.items())[0]
# if the key in the descriptor is a type then we have
# a generic type description that applies to all keys
# and values in the dictionary.
# check if the description is a type object
if isinstance(desc_k, type):
# there can only be one generic type desc
assert len(desc) == 1
# decode all key / value pairs
for k, v in six.iteritems(data):
# decode the key
name2 = "{0}[{1}].key()".format(name, desc_k)
k2 = json_decode(name2, k, desc_k,
jd_state=jd_state)
# decode the value
name2 = "{0}[{1}].value()".format(name, desc_k)
v2 = json_decode(name2, v, desc_v,
jd_state=jd_state)
# save the result
rv[k2] = v2
return jd_return(name, rv, desc, finish, jd_state)
# we have element specific value type descriptions.
# copy all data and then decode the specific values
rv.update(data)
for desc_k, desc_v in six.iteritems(desc):
# check for the specific key
if desc_k not in rv:
continue
# decode the value
name2 = "{0}[{1}].value()".format(name, desc_k)
rv[desc_k] = json_decode(name2, rv[desc_k],
desc_v, jd_state=jd_state)
return jd_return(name, rv, desc, finish, jd_state)
# decode elements nested in a list
# return elements in the specified list like object
if isinstance(desc, (tuple, list, set, frozenset)):
# get the return type
rvtype = type(desc)
# check for an empty list since we use izip_longest(zip_longest
# in python 3)
if len(data) == 0:
rv = rvtype([])
return jd_return(name, rv, desc, finish, jd_state)
# check if we're not decoding nested elements
if len(desc) == 0:
rv = rvtype(data)
return jd_return(name, rv, desc, finish, jd_state)
# don't accidentally generate data via izip_longest(zip_longest
# in python 3)
assert len(data) >= len(desc), \
"{0:d} >= {1:d}".format(len(data), len(desc))
rv = []
i = 0
for data2, desc2 in zip_longest(data, desc,
fillvalue=list(desc)[0]):
name2 = "{0}[{1:d}]".format(name, i)
i += 1
rv.append(json_decode(name2, data2, desc2,
jd_state=jd_state))
rv = rvtype(rv)
return jd_return(name, rv, desc, finish, jd_state)
# find a decoder for this data, which should be:
# <class>.fromstate(state, jd_state)
decoder = getattr(desc_type, "fromstate", None)
assert decoder is not None, "no json decoder for: {0}".format(desc_type)
# if this object was commonized then get a reference to it from the
# object cache.
if desc_type in commonize:
assert type(data) == int
# json.dump converts integer dictionary keys into strings, so
# obj_cache was indexed by integer strings.
data = str(data)
rv = obj_cache[data]
# get the data type
data_type = getattr(rv, "__metaclass__", type(rv))
if data_type != desc_type:
# this commonized object hasn't been decoded yet
# decode it and update the cache with the decoded obj
rv = decoder(rv, jd_state)
obj_cache[data] = rv
else:
# decode the data
rv = decoder(data, jd_state)
return jd_return(name, rv, desc, finish, jd_state)
def json_validate(name, data):
"""Validate that a named piece of data can be represented in json and
that the data can be passed directly to json.dump(). If the data
can't be represented as json we'll trigger an assert.
'name' is the name of the data to validate
'data' is the data to validate
'recurse' is an optional integer that controls recursion. if it's a
negative number (the default) we recursively check any nested lists or
dictionaries. if it's a positive integer than we only recurse to
the specified depth."""
assert isinstance(data, json_types), \
"invalid json type \"{0}\" for \"{1}\", value: {2}".format(
type(data), name, str(data))
if type(data) == dict:
for k in data:
# json.dump converts integer dictionary keys into
# strings, which is a bit unexpected. so make sure we
# don't have any of those.
assert type(k) != int, \
"integer dictionary keys detected for: {0}".format(
name)
# validate the key and the value
new_name = "{0}[{1}].key()".format(name, k)
json_validate(new_name, k)
new_name = "{0}[{1}].value()".format(name, k)
json_validate(new_name, data[k])
if type(data) == list:
for i in range(len(data)):
new_name = "{0}[{1:d}]".format(name, i)
json_validate(new_name, data[i])
def json_diff(name, d0, d1, alld0, alld1):
"""Compare two json encoded objects to make sure they are
identical, assert() if they are not."""
def dbg():
"""dump debug info for json_diff"""
def d(s):
"""dbg helper"""
return json.dumps(s, sort_keys=True, indent=4)
return "\n--- d0\n" + d(d0) + "\n+++ d1\n" + d(d1) + \
"\n--- alld0\n" + d(alld0) + "\n+++ alld1\n" + d(alld1)
assert type(d0) == type(d1), ("Json data types differ for \"{0}\":\n"
"type 1: {1}\ntype 2: {2}\n").format(name, type(d0),
type(d1)) + dbg()
if type(d0) == dict:
assert set(d0) == set(d1), (
"Json dictionary keys differ for \"{0}\":\n"
"dict 1 missing: {1}\n"
"dict 2 missing: {2}\n").format(name,
set(d1) - set(d0), set(d0) - set(d1)) + dbg()
for k in d0:
new_name = "{0}[{1}]".format(name, k)
json_diff(new_name, d0[k], d1[k], alld0, alld1)
if type(d0) == list:
assert len(d0) == len(d1), (
"Json list lengths differ for \"{0}\":\n"
"list 1 length: {1}\n"
"list 2 length: {2}\n").format(name,
len(d0), len(d1)) + dbg()
for i in range(len(d0)):
new_name = "{0}[{1:d}]".format(name, i)
json_diff(new_name, d0[i], d1[i], alld0, alld1)
def json_hook(dct):
"""Hook routine used by the JSON module to ensure that unicode objects
are converted to bytes objects in Python 2 and ensures that bytes
objects are converted to str objects in Python 3."""
rvdct = {}
for k, v in six.iteritems(dct):
if isinstance(k, six.string_types):
k = force_str(k)
if isinstance(v, six.string_types):
v= force_str(v)
rvdct[k] = v
return rvdct
class Timer(object):
"""A class which can be used for measuring process times (user,
system, and wait)."""
__precision = 3
__log_fmt = "utime: {0:>7.3f}; stime: {1:>7.3f}; wtime: {2:>7.3f}"
__log_fmt_shift = "utime: {1:>7.3f}; stime: {2:>7.3f}; wtime: {3:>7.3f}"
def __init__(self, module):
self.__module = module
self.__timings = []
# we initialize our time values to account for all time used
# since the start of the process. (user and system time are
# obtained relative to process start time, but wall time is an
# absolute time value so here we initialize out initial wall
# time value to the time our process was started.)
self.__utime = self.__stime = 0
self.__wtime = _prstart()
def __zero1(self, delta):
"""Return True if a number is zero (up to a certain level of
precision.)"""
return int(delta * (10 ** self.__precision)) == 0
def __zero(self, udelta, sdelta, wdelta):
"""Return True if all the passed in values are zero."""
return self.__zero1(udelta) and \
self.__zero1(sdelta) and \
self.__zero1(wdelta)
def __str__(self):
s = "\nTimings for {0}: [\n".format(self.__module)
utotal = stotal = wtotal = 0
phases = [i[0] for i in self.__timings] + ["total"]
phase_width = max([len(i) for i in phases]) + 1
fmt = " {{0:{0}}} {1};\n".format(phase_width,
Timer.__log_fmt_shift)
for phase, udelta, sdelta, wdelta in self.__timings:
if self.__zero(udelta, sdelta, wdelta):
continue
utotal += udelta
stotal += sdelta
wtotal += wdelta
s += fmt.format(phase + ":", udelta, sdelta, wdelta)
s += fmt.format("total:", utotal, stotal, wtotal)
s += "]\n"
return s
def reset(self):
"""Update saved times to current process values."""
self.__utime, self.__stime, self.__wtime = self.__get_time()
@staticmethod
def __get_time():
"""Get current user, system, and wait times for this
process."""
rusage = resource.getrusage(resource.RUSAGE_SELF)
utime = rusage[0]
stime = rusage[1]
wtime = time.time()
return (utime, stime, wtime)
def record(self, phase, logger=None):
"""Record the difference between the previously saved process
time values and the current values. Then update the saved
values to match the current values"""
utime, stime, wtime = self.__get_time()
udelta = utime - self.__utime
sdelta = stime - self.__stime
wdelta = wtime - self.__wtime
self.__timings.append((phase, udelta, sdelta, wdelta))
self.__utime, self.__stime, self.__wtime = utime, stime, wtime
rv = "{0}: {1}: ".format(self.__module, phase)
rv += Timer.__log_fmt.format(udelta, sdelta, wdelta)
if logger:
logger.debug(rv)
return rv
class AsyncCallException(Exception):
"""Exception class for AsyncCall() errors.
Any exceptions caught by the async call thread get bundled into this
Exception because otherwise we'll lose the stack trace associated with
the original exception."""
def __init__(self, e=None):
Exception.__init__(self)
self.e = e
self.tb = None
def __str__(self):
if self.tb:
return str(self.tb) + str(self.e)
return str(self.e)
class AsyncCall(object):
"""Class which can be used to call a function asynchronously.
The call is performed via a dedicated thread."""
def __init__(self):
self.rv = None
self.e = None
# keep track of what's been done
self.started = False
# internal state
self.__thread = None
# pre-allocate an exception that we'll used in case everything
# goes horribly wrong.
self.__e = AsyncCallException(
Exception("AsyncCall Internal Error"))
def __thread_cb(self, dummy, cb, *args, **kwargs):
"""Dedicated call thread.
'dummy' is a dummy parameter that is not used. this is done
because the threading module (which invokes this function)
inspects the first argument of "args" to check if it's
iterable, and that may cause bizarre failures if cb is a
dynamically bound class (like xmlrpclib._Method).
We need to be careful here and catch all exceptions. Since
we're executing in our own thread, any exceptions we don't
catch get dumped to the console."""
# Catch "Exception"; pylint: disable=W0703
try:
# Value 'DebugValues' is unsubscriptable;
# pylint: disable=E1136
if DebugValues["async_thread_error"]:
raise Exception("async_thread_error")
rv = e = None
try:
rv = cb(*args, **kwargs)
except Exception as e:
self.e = self.__e
self.e.e = e
self.e.tb = traceback.format_exc()
return
self.rv = rv
except Exception as e:
# if we raise an exception here, we're hosed
self.rv = None
self.e = self.__e
self.e.e = e
try:
# Value 'DebugValues' is unsubscriptable;
# pylint: disable=E1136
if DebugValues["async_thread_error"]:
raise Exception("async_thread_error")
self.e.tb = traceback.format_exc()
except Exception:
pass
def start(self, cb, *args, **kwargs):
"""Start a call to an rpc server."""
assert not self.started
self.started = True
# prepare the arguments for the thread
if args:
args = (0, cb) + args
else:
args = (0, cb)
# initialize and return the thread
self.__thread = threading.Thread(target=self.__thread_cb,
args=args, kwargs=kwargs)
self.__thread.daemon = True
self.__thread.start()
def join(self):
"""Wait for an rpc call to finish."""
assert self.started
self.__thread.join()
def is_done(self):
"""Check if an rpc call is done."""
assert self.started
return not self.__thread.is_alive()
def result(self):
"""Finish a call to an rpc server."""
assert self.started
# wait for the async call thread to exit
self.join()
assert self.is_done()
if self.e:
# if the calling thread hit an exception, re-raise it
# Raising NoneType; pylint: disable=E0702
raise self.e
return self.rv
def get_runtime_proxy(proxy, uri):
"""Given a proxy string and a URI we want to access using it, determine
whether any OS environment variables should override that value.
The special value "-" is returned when a no_proxy environment variable
was found which should apply to this URI, indicating that no proxy
should be used at runtime."""
runtime_proxy = proxy
# There is no upper case version of http_proxy, according to curl(1)
environ_http_proxy = os.environ.get("http_proxy")
environ_https_proxy = os.environ.get("https_proxy")
environ_https_proxy_upper = os.environ.get("HTTPS_PROXY")
environ_all_proxy = os.environ.get("all_proxy")
environ_all_proxy_upper = os.environ.get("ALL_PROXY")
no_proxy = os.environ.get("no_proxy", [])
no_proxy_upper = os.environ.get("NO_PROXY", [])
if no_proxy:
no_proxy = no_proxy.split(",")
if no_proxy_upper:
no_proxy_upper = no_proxy_upper.split(",")
# Give precedence to protocol-specific proxies, and lowercase versions.
if uri and uri.startswith("http") and environ_http_proxy:
runtime_proxy = environ_http_proxy
elif uri and uri.startswith("https") and environ_https_proxy:
runtime_proxy = environ_https_proxy
elif uri and uri.startswith("https") and environ_https_proxy_upper:
runtime_proxy = environ_https_proxy_upper
elif environ_all_proxy:
runtime_proxy = environ_all_proxy
elif environ_all_proxy_upper:
runtime_proxy = environ_all_proxy_upper
if no_proxy or no_proxy_upper:
# SplitResult has a netloc member; pylint: disable=E1103
netloc = urlsplit(uri, allow_fragments=0).netloc
host = netloc.split(":")[0]
if host in no_proxy or no_proxy == ["*"]:
return "-"
if host in no_proxy_upper or no_proxy_upper == ["*"]:
return "-"
if not runtime_proxy:
return
return runtime_proxy
def decode(s):
"""convert non-ascii strings to unicode;
replace non-convertable chars"""
if six.PY3:
return s
try:
# this will fail if any 8 bit chars in string
# this is a nop if string is ascii.
s = s.encode("ascii")
except ValueError:
# this will encode 8 bit strings into unicode
s = s.decode("utf-8", "replace")
return s
def yield_matching(pat_prefix, items, patterns):
"""Helper function for yielding items that match one of the provided
patterns."""
if patterns:
# Normalize patterns and determine whether to glob.
npatterns = []
for p in patterns:
if pat_prefix:
pat = p.startswith(pat_prefix) and \
p or (pat_prefix + p)
else:
pat = p
if "*" in p or "?" in p:
pat = re.compile(fnmatch.translate(pat)).match
glob_match = True
else:
glob_match = False
npatterns.append((pat, glob_match))
patterns = npatterns
npatterns = None
for item in items:
for (pat, glob_match) in patterns:
if glob_match:
if pat(item):
break
elif item == pat:
break
else:
if patterns:
continue
# No patterns or matched at least one.
yield item
sigdict = defaultdict(list)
def signame(signal_number):
"""convert signal number to name(s)"""
if not sigdict:
for name in dir(signal):
if name.startswith("SIG") and "_" not in name:
sigdict[getattr(signal, name)].append(name)
return "/".join(sigdict.get(signal_number,
["Unnamed signal: {0:d}".format(signal_number)]))
def list_actions_by_attrs(actionlist, attrs, show_all=False,
remove_consec_dup_lines=False, last_res=None):
"""Produces a list of n tuples (where n is the length of attrs)
containing the relevant information about the actions.
The "actionlist" parameter is a list of tuples which contain the fmri
of the package that's the source of the action, the action, and the
publisher the action's package came from. If the actionlist was
generated by searching, the last two pieces, "match" and "match_type"
contain information about why this action was selected.
The "attrs" parameter is a list of the attributes of the action that
should be displayed.
The "show_all" parameter determines whether an action that lacks one
or more of the desired attributes will be displayed or not.
The "remove_consec_dup_lines" parameter determines whether consecutive
duplicate lines should be removed from the results.
The "last_res" parameter is a seed to compare the first result against
for duplicate removal.
"""
# Assert that if last_res is set, we should be removing duplicate
# lines.
assert remove_consec_dup_lines or not last_res
last_line = last_res
for pfmri, action, pub, match, match_type in actionlist:
line = []
for attr in attrs:
if action and attr in action.attrs:
a = action.attrs[attr]
elif attr == "action.name":
a = action.name
elif attr == "action.key":
a = action.attrs[action.key_attr]
elif attr == "action.raw":
a = action
elif attr in ("hash", "action.hash"):
a = getattr(action, "hash", "")
elif attr == "pkg.name":
a = pfmri.get_name()
elif attr == "pkg.fmri":
a = pfmri.get_fmri(include_build=False)
elif attr == "pkg.shortfmri":
a = pfmri.get_short_fmri()
elif attr == "pkg.publisher":
a = pfmri.get_publisher()
if a is None:
a = pub
if a is None:
a = ""
elif attr == "search.match":
a = match
elif attr == "search.match_type":
a = match_type
else:
a = ""
line.append(a)
# Too many boolean expressions in if statement;
# pylint: disable=R0916
if (line and [l for l in line if str(l) != ""] or show_all) \
and (not remove_consec_dup_lines or last_line is None or
last_line != line):
last_line = line
yield line
def _min_edit_distance(word1, word2):
"""Calculate the minimal edit distance for converting word1 to word2,
based on Wagner-Fischer algorithm."""
m = len(word1)
n = len(word2)
# dp[i][j] stands for the edit distance between two strings with
# length i and j, i.e., word1[0,...,i-1] and word2[0,...,j-1]
dp = [[0 for i in range(n+1)] for j in range(m+1)]
ins_cost = 1.0
del_cost = 1.0
rep_cost = 1.0
for i in range(m+1):
dp[i][0] = del_cost * i
for i in range(n+1):
dp[0][i] = ins_cost * i
for i in range(1, m+1):
for j in range(1, n+1):
if word1[i-1] == word2[j-1]:
dp[i][j] = dp[i-1][j-1]
else:
dp[i][j] = min(
dp[i-1][j-1] + rep_cost,
dp[i][j-1] + ins_cost,
dp[i-1][j] + del_cost)
return dp[m][n]
def suggest_known_words(text, known_words):
"""Given a text, a list of known_words, suggest some correct
candidates from known_words."""
candidates = []
if not text:
return candidates
# We are confident to suggest if the text is part of the known words.
for known in known_words:
if len(text) < 4:
# If the text's length is short, treat it as a prefix.
if known.startswith(text):
candidates.append(known)
elif text in known or known in text:
# Otherwise check if the text is part of the known
# words or vice verse.
candidates.append(known)
if candidates:
if len(candidates) < 4:
return candidates
else:
# Give up suggestions if there are too many candidates.
return
# If there are no candidates from the "contains" check, use the edit
# distance algorithm to seek further.
for known in known_words:
distance = _min_edit_distance(text, known)
if distance <= len(known) / 2.0:
candidates.append((known, distance))
# Sort the candidates by their distance, and return the words only.
return [c[0] for c in sorted(candidates, key=itemgetter(1))]
def smallest_diff_key(a, b):
"""Return the smallest key 'k' in 'a' such that a[k] != b[k]."""
keys = [k for k in a if a.get(k) != b.get(k)]
if not keys:
return None
return min(keys)
def dict_cmp(a, b):
"""cmp method for dictionary, translated from the source code
http://svn.python.org/projects/python/trunk/Objects/dictobject.c"""
if len(a) != len(b):
return cmp(len(a), len(b))
adiff = smallest_diff_key(a, b)
bdiff = smallest_diff_key(b, a)
if adiff is None and bdiff is None:
return 0
if adiff != bdiff:
return cmp(adiff, bdiff)
return cmp(a[adiff], b[bdiff])
def cmp(a, b):
"""Implementaion for Python 2.7's built-in function cmp(), which is
removed in Python 3."""
if isinstance(a, dict) and isinstance(b, dict):
return dict_cmp(a, b)
try:
if a == b:
return 0
elif a < b:
return -1
else:
return 1
except TypeError:
if a is None and b:
return -1
if a and b is None:
return 1
return NotImplemented
def set_memory_limit(bytes, allow_override=True):
"""Limit memory consumption of current process to 'bytes'."""
if allow_override:
try:
bytes = int(os.environ["PKG_CLIENT_MAX_PROCESS_SIZE"])
except (KeyError, ValueError):
pass
try:
resource.setrlimit(resource.RLIMIT_DATA, (bytes, bytes))
except AttributeError:
# If platform doesn't support RLIMIT_DATA, just ignore it.
pass
except ValueError:
# An unprivileged user can not raise a previously set limit,
# if that ever happens, just ignore it.
pass
def force_bytes(s, encoding="utf-8", errors="strict"):
"""Force the string into bytes."""
if isinstance(s, bytes):
return s
try:
if isinstance(s, six.string_types):
# this case is: unicode in Python 2 and str in Python 3
return s.encode(encoding, errors)
elif six.PY3:
# type not a string and Python 3's bytes() requires
# a string argument
return six.text_type(s).encode(encoding)
# type not a string
s = bytes(s)
except UnicodeEncodeError:
raise
return s
def force_text(s, encoding="utf-8", errors="strict"):
"""Force the string into text."""
if isinstance(s, six.text_type):
return s
try:
if isinstance(s, (six.string_types, bytes)):
# this case is: str(bytes) in Python 2 and bytes in
# Python 3
s = s.decode(encoding, errors)
else:
# type not a string
s = six.text_type(s)
except UnicodeDecodeError as e:
raise api_errors.PkgUnicodeDecodeError(s, *e.args)
return s
# force_str minimizes the work for compatible string handling between Python
# 2 and 3 because we will have the native string type in its runtime, that is,
# bytes in Python 2 and unicode string in Python 3.
if six.PY2:
force_str = force_bytes
else:
force_str = force_text
def open_image_file(root, path, flag, mode=None):
"""Safely open files that ensures that the path we'are accessing resides
within a specified image root.
'root' is a directory that the path must reside in.
"""
try:
return os.fdopen(os.open(path, flag|os.O_NOFOLLOW, mode))
except EnvironmentError as e:
if e.errno != errno.ELOOP:
# Access to protected member; pylint: disable=W0212
raise api_errors._convert_error(e)
# If it is a symbolic link, fall back to ar_open. ar_open interprets
# 'path' as relative to 'root', that is, 'root' will be prepended to
# 'path', so we need to call os.path.relpath here.
from pkg.altroot import ar_open
return os.fdopen(ar_open(root, os.path.relpath(path, root), flag, mode))
def check_ca(cert):
"""Check if 'cert' is a proper CA. For this the BasicConstraints need to
identify it as a CA cert and it needs to have the CertSign
(key_cert_sign in Cryptography) KeyUsage flag. Based loosely on
OpenSSL's check_ca()"""
from cryptography import x509
bconst_ca = None
kuse_sign = None
for e in cert.extensions:
if isinstance(e.value, x509.BasicConstraints):
bconst_ca = e.value.ca
elif isinstance(e.value, x509.KeyUsage):
kuse_sign = e.value.key_cert_sign
return kuse_sign is not False and bconst_ca
FILE_DESCRIPTOR_LIMIT = 4096
def set_fd_limits(printer=None):
"""Set the open file descriptor soft limit."""
if printer is None:
printer = emsg
try:
(soft, hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
soft = max(hard, FILE_DESCRIPTOR_LIMIT)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
except (OSError, ValueError) as e:
printer(_("unable to set open file limit to {0}; please "
"increase the open file limit using 'ulimit -n'"
" and try the requested operation again: {1}")\
.format(soft, e))
sys.exit(EXIT_OOPS)
_varcetname_re = re.compile(r"\s")
def valid_varcet_name(name):
"""Check if the variant/facet name is valid. A valid variant/facet
name cannot contain whitespace"""
return _varcetname_re.search(name) is None