userland-fetch revision 5680
3770N/A#!/usr/bin/python2.7
18N/A#
18N/A# CDDL HEADER START
18N/A#
18N/A# The contents of this file are subject to the terms of the
18N/A# Common Development and Distribution License (the "License").
18N/A# You may not use this file except in compliance with the License.
18N/A#
18N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
18N/A# or http://www.opensolaris.org/os/licensing.
18N/A# See the License for the specific language governing permissions
18N/A# and limitations under the License.
18N/A#
18N/A# When distributing Covered Code, include this CDDL HEADER in each
18N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
18N/A# If applicable, add the following below this CDDL HEADER, with the
18N/A# fields enclosed by brackets "[]" replaced with your own identifying
18N/A# information: Portions Copyright [yyyy] [name of copyright owner]
18N/A#
18N/A# CDDL HEADER END
18N/A#
5680N/A
5680N/A#
5242N/A# Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
18N/A#
18N/A#
3533N/A# userland-fetch - a file download utility
18N/A#
18N/A# A simple program similiar to wget(1), but handles local file copy, ignores
18N/A# directories, and verifies file hashes.
18N/A#
18N/A
3533N/Aimport errno
18N/Aimport os
18N/Aimport sys
42N/Aimport shutil
3533N/Aimport subprocess
888N/Afrom urllib import splittype
888N/Afrom urllib2 import urlopen
135N/Aimport hashlib
18N/A
136N/Adef printIOError(e, txt):
136N/A """ Function to decode and print IOError type exception """
136N/A print "I/O Error: " + txt + ": "
136N/A try:
136N/A (code, message) = e
136N/A print str(message) + " (" + str(code) + ")"
136N/A except:
136N/A print str(e)
3533N/A
3533N/Adef validate_signature(path, signature):
3533N/A """Given paths to a file and a detached PGP signature, verify that
3533N/A the signature is valid for the file. Current configuration allows for
3533N/A unrecognized keys to be downloaded as necessary."""
3533N/A
3533N/A # Find the root of the repo so that we can point GnuPG at the right
3533N/A # configuration and keyring.
3533N/A proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE)
3533N/A proc.wait()
3533N/A if proc.returncode != 0:
3533N/A return False
3533N/A out, err = proc.communicate()
3533N/A gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
3533N/A
3533N/A # Skip the permissions warning: none of the information here is private,
3533N/A # so not having to worry about getting mercurial keeping the directory
3533N/A # unreadable is just simplest.
3533N/A try:
3533N/A proc = subprocess.Popen(["gpg2", "--verify",
3533N/A "--no-permission-warning", "--homedir", gpgdir, signature,
3533N/A path], stdin=open("/dev/null"),
3533N/A stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
3533N/A except OSError as e:
3533N/A # If the executable simply couldn't be found, just skip the
3533N/A # validation.
3533N/A if e.errno == errno.ENOENT:
3533N/A return False
3533N/A raise
3533N/A
3533N/A proc.wait()
3533N/A if proc.returncode != 0:
3533N/A # Only print GnuPG's output when there was a problem.
3533N/A print proc.stdout.read()
3533N/A return False
3533N/A return True
5242N/A
135N/Adef validate(file, hash):
3533N/A """Given a file-like object and a hash string, verify that the hash
3533N/A matches the file contents."""
3533N/A
3533N/A try:
3533N/A algorithm, hashvalue = hash.split(':')
3533N/A except:
3533N/A algorithm = "sha256"
844N/A
844N/A # force migration away from sha1
844N/A if algorithm == "sha1":
844N/A algorithm = "sha256"
18N/A try:
18N/A m = hashlib.new(algorithm)
18N/A except ValueError:
18N/A return False
18N/A
18N/A while True:
137N/A try:
137N/A block = file.read()
137N/A except IOError, err:
137N/A print str(err),
137N/A break
137N/A
18N/A m.update(block)
18N/A if block == '':
18N/A break
18N/A
58N/A return "%s:%s" % (algorithm, m.hexdigest())
18N/A
135N/Adef validate_container(filename, hash):
3533N/A """Given a file path and a hash string, verify that the hash matches the
3533N/A file contents."""
3533N/A
135N/A try:
135N/A file = open(filename, 'r')
136N/A except IOError as e:
136N/A printIOError(e, "Can't open file " + filename)
135N/A return False
135N/A return validate(file, hash)
135N/A
135N/A
135N/Adef validate_payload(filename, hash):
3533N/A """Given a file path and a hash string, verify that the hash matches the
3533N/A payload (uncompressed content) of the file."""
3533N/A
135N/A import re
135N/A import gzip
135N/A import bz2
135N/A
135N/A expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
135N/A expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
844N/A expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
135N/A
135N/A try:
135N/A if expr_bz.match(filename):
137N/A file = bz2.BZ2File(filename, 'r')
135N/A elif expr_gz.match(filename):
135N/A file = gzip.GzipFile(filename, 'r')
844N/A elif expr_tgz.match(filename):
844N/A file = gzip.GzipFile(filename, 'r')
135N/A else:
135N/A return False
136N/A except IOError as e:
136N/A printIOError(e, "Can't open archive " + filename)
135N/A return False
135N/A return validate(file, hash)
135N/A
135N/A
5242N/Adef download(url, timeout, filename=None, quiet=False):
3533N/A """Download the content at the given URL to the given filename
3533N/A (defaulting to the basename of the URL if not given. If 'quiet' is
3533N/A True, throw away any error messages. Returns the name of the file to
3533N/A which the content was donloaded."""
3533N/A
18N/A src = None
18N/A
42N/A try:
5242N/A src = urlopen(url=url, timeout=timeout)
136N/A except IOError as e:
3533N/A if not quiet:
3533N/A printIOError(e, "Can't open url " + url)
18N/A return None
18N/A
212N/A # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
212N/A if 3 <= int(src.getcode()/100) <= 5:
3533N/A if not quiet:
3533N/A print "Error code: " + str(src.getcode())
212N/A return None
212N/A
18N/A if filename == None:
42N/A filename = src.geturl().split('/')[-1]
18N/A
42N/A try:
18N/A dst = open(filename, 'wb');
136N/A except IOError as e:
3533N/A if not quiet:
3533N/A printIOError(e, "Can't open file " + filename + " for writing")
42N/A src.close()
42N/A return None
42N/A
42N/A while True:
42N/A block = src.read()
42N/A if block == '':
42N/A break;
42N/A dst.write(block)
42N/A
42N/A src.close()
42N/A dst.close()
18N/A
18N/A # return the name of the file that we downloaded the data to.
18N/A return filename
18N/A
42N/Adef download_paths(search, filename, url):
3533N/A """Returns a list of URLs where the file 'filename' might be found,
3533N/A using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
3533N/A
3533N/A If 'filename' is None, then the list will simply contain 'url'.
3533N/A """
3533N/A
18N/A urls = list()
18N/A
42N/A if filename != None:
42N/A tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
42N/A if tmp:
42N/A search += tmp.split(' ')
42N/A
42N/A file = os.path.basename(filename)
42N/A
42N/A urls = [ base + '/' + file for base in search ]
42N/A
42N/A # filename should always be first
42N/A if filename in urls:
42N/A urls.remove(filename)
42N/A urls.insert(0, filename)
42N/A
42N/A # command line url is a fallback, so it's last
42N/A if url != None and url not in urls:
42N/A urls.append(url)
18N/A
18N/A return urls
18N/A
5242N/Adef download_from_paths(search_list, file_arg, url, timeout_arg, link_arg, quiet=False):
3533N/A """Attempts to download a file from a number of possible locations.
3533N/A Generates a list of paths where the file ends up on the local
3533N/A filesystem. This is a generator because while a download might be
3533N/A successful, the signature or hash may not validate, and the caller may
3533N/A want to try again from the next location. The 'link_arg' argument is a
3533N/A boolean which, when True, specifies that if the source is not a remote
3533N/A URL and not already found where it should be, to make a symlink to the
3533N/A source rather than copying it.
3533N/A """
3533N/A for url in download_paths(search_list, file_arg, url):
3533N/A if not quiet:
3533N/A print "Source %s..." % url,
3533N/A
3533N/A scheme, path = splittype(url)
3533N/A name = file_arg
3533N/A
3533N/A if scheme in [ None, 'file' ]:
3533N/A if os.path.exists(path) == False:
3533N/A if not quiet:
3533N/A print "not found, skipping file copy"
3533N/A continue
3533N/A elif name and name != path:
3533N/A if link_arg == False:
3533N/A if not quiet:
3533N/A print "\n copying..."
3533N/A shutil.copy2(path, name)
3533N/A else:
3533N/A if not quiet:
3533N/A print "\n linking..."
3533N/A os.symlink(path, name)
3533N/A elif scheme in [ 'http', 'https', 'ftp' ]:
3533N/A if not quiet:
3533N/A print "\n downloading...",
5242N/A name = download(url, timeout_arg, file_arg, quiet)
3533N/A if name == None:
3533N/A if not quiet:
3533N/A print "failed"
3533N/A continue
3533N/A
3533N/A yield name
3533N/A
18N/Adef usage():
3533N/A print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] " \
5242N/A "[-s|--search (search-dir)] [-S|--sigurl (signature-url)] " \
5242N/A "[-t|--timeout (timeout)] --url (url)" % \
3533N/A (sys.argv[0].split('/')[-1])
18N/A sys.exit(1)
18N/A
18N/Adef main():
18N/A import getopt
18N/A
5242N/A # FLUSH STDOUT
18N/A sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
18N/A
42N/A file_arg = None
42N/A link_arg = False
42N/A hash_arg = None
42N/A url_arg = None
3533N/A sig_arg = None
5242N/A timeout_arg = 300
42N/A search_list = list()
18N/A
18N/A try:
5242N/A opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:S:t:u:",
5242N/A ["file=", "link", "hash=", "search=", "sigurl=",
5242N/A "timeout=", "url="])
18N/A except getopt.GetoptError, err:
18N/A print str(err)
18N/A usage()
18N/A
18N/A for opt, arg in opts:
18N/A if opt in [ "-f", "--file" ]:
42N/A file_arg = arg
42N/A elif opt in [ "-l", "--link" ]:
42N/A link_arg = True
18N/A elif opt in [ "-h", "--hash" ]:
42N/A hash_arg = arg
18N/A elif opt in [ "-s", "--search" ]:
42N/A search_list.append(arg)
3533N/A elif opt in [ "-S", "--sigurl" ]:
3533N/A sig_arg = arg
5242N/A elif opt in [ "-t", "--timeout" ]:
5242N/A try:
5242N/A timeout_arg = int(arg)
5242N/A except ValueError:
5242N/A print "Invalid argument for %s, should be a " \
5242N/A "number, but is %s" % (opt, arg)
5242N/A sys.exit(1)
5242N/A if timeout_arg < 0:
5242N/A print "Invalid argument for %s, should be a " \
5242N/A "positive number, but is %s" % (opt, arg)
5242N/A sys.exit(1)
18N/A elif opt in [ "-u", "--url" ]:
42N/A url_arg = arg
18N/A else:
18N/A assert False, "unknown option"
18N/A
5242N/A for name in download_from_paths(search_list, file_arg, url_arg,
5242N/A timeout_arg, link_arg):
3533N/A print "\n validating signature...",
18N/A
3533N/A sig_valid = False
3533N/A if not sig_arg:
3533N/A print "skipping (no signature URL)"
3533N/A else:
3533N/A # Put the signature file in the same directory as the
3533N/A # file we're downloading.
3533N/A sig_file = os.path.join(
3533N/A os.path.dirname(file_arg),
3533N/A os.path.basename(sig_arg))
3533N/A # Validate with the first signature we find.
3533N/A for sig_file in download_from_paths(search_list, sig_file,
5242N/A sig_arg, timeout_arg, link_arg, True):
3533N/A if sig_file:
3533N/A if validate_signature(name, sig_file):
3533N/A print "ok"
3533N/A sig_valid = True
3533N/A else:
3533N/A print "failed"
3533N/A break
42N/A else:
3533N/A continue
42N/A else:
3533N/A print "failed (couldn't fetch signature)"
3533N/A
3533N/A print " validating hash...",
3533N/A realhash = validate_container(name, hash_arg)
18N/A
3533N/A if not hash_arg:
58N/A print "skipping (no hash)"
3533N/A print "hash is: %s" % realhash
3533N/A elif realhash == hash_arg:
42N/A print "ok"
18N/A else:
135N/A payloadhash = validate_payload(name, hash_arg)
135N/A if payloadhash == hash_arg:
135N/A print "ok"
3533N/A else:
3533N/A # If the signature validated, then we assume
3533N/A # that the expected hash is just a typo, but we
3533N/A # warn just in case.
3533N/A if sig_valid:
4427N/A print "invalid hash! Did you forget " \
4427N/A "to update it?"
3533N/A else:
3533N/A print "corruption detected"
18N/A
3533N/A print " expected: %s" % hash_arg
3533N/A print " actual: %s" % realhash
3533N/A print " payload: %s" % payloadhash
18N/A
4427N/A # If the hash is invalid, but the signature
4427N/A # validation succeeded, rename the archive (so
4427N/A # the user doesn't have to re-download it) and
4427N/A # fail. Otherwise, try to remove the file and
4427N/A # try again.
4427N/A if sig_valid:
4427N/A newname = name + ".invalid-hash"
4427N/A try:
4427N/A os.rename(name, newname)
4427N/A except OSError:
4427N/A pass
4427N/A else:
4427N/A print "archive saved as %s; " \
4427N/A "if it isn't corrupt, " \
4427N/A "rename to %s" % (newname,
4427N/A name)
4427N/A sys.exit(1)
4427N/A else:
3533N/A try:
3533N/A os.remove(name)
3533N/A except OSError:
3533N/A pass
3533N/A
3533N/A continue
3533N/A
3533N/A sys.exit(0)
18N/A sys.exit(1)
18N/A
18N/Aif __name__ == "__main__":
18N/A main()