userland-fetch revision 135
18N/A#!/usr/bin/python2.6
18N/A#
18N/A# CDDL HEADER START
18N/A#
18N/A# The contents of this file are subject to the terms of the
18N/A# Common Development and Distribution License (the "License").
18N/A# You may not use this file except in compliance with the License.
18N/A#
18N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
18N/A# or http://www.opensolaris.org/os/licensing.
18N/A# See the License for the specific language governing permissions
18N/A# and limitations under the License.
18N/A#
18N/A# When distributing Covered Code, include this CDDL HEADER in each
18N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
18N/A# If applicable, add the following below this CDDL HEADER, with the
18N/A# fields enclosed by brackets "[]" replaced with your own identifying
18N/A# information: Portions Copyright [yyyy] [name of copyright owner]
18N/A#
18N/A# CDDL HEADER END
18N/A#
58N/A# Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
18N/A#
18N/A#
18N/A# fetch.py - a file download utility
18N/A#
18N/A# A simple program similiar to wget(1), but handles local file copy, ignores
18N/A# directories, and verifies file hashes.
18N/A#
18N/A
18N/Aimport os
18N/Aimport sys
42N/Aimport shutil
42N/Afrom urllib import splittype, urlopen
135N/Aimport hashlib
18N/A
135N/Adef validate(file, hash):
58N/A algorithm, hashvalue = hash.split(':')
18N/A try:
18N/A m = hashlib.new(algorithm)
18N/A except ValueError:
18N/A return False
18N/A
18N/A while True:
18N/A block = file.read()
18N/A m.update(block)
18N/A if block == '':
18N/A break
18N/A
58N/A return "%s:%s" % (algorithm, m.hexdigest())
18N/A
135N/Adef validate_container(filename, hash):
135N/A try:
135N/A file = open(filename, 'r')
135N/A except IOError:
135N/A return False
135N/A return validate(file, hash)
135N/A
135N/A
135N/Adef validate_payload(filename, hash):
135N/A import re
135N/A import gzip
135N/A import bz2
135N/A
135N/A expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
135N/A expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
135N/A
135N/A try:
135N/A if expr_bz.match(filename):
135N/A file = bz2.BZFile(filename, 'r')
135N/A elif expr_gz.match(filename):
135N/A file = gzip.GzipFile(filename, 'r')
135N/A else:
135N/A return False
135N/A except IOError:
135N/A return False
135N/A return validate(file, hash)
135N/A
135N/A
42N/Adef download(url, filename = None):
18N/A src = None
18N/A
42N/A try:
42N/A src = urlopen(url)
42N/A except IOError:
18N/A return None
18N/A
18N/A if filename == None:
42N/A filename = src.geturl().split('/')[-1]
18N/A
42N/A try:
18N/A dst = open(filename, 'wb');
42N/A except IOError:
42N/A src.close()
42N/A return None
42N/A
42N/A while True:
42N/A block = src.read()
42N/A if block == '':
42N/A break;
42N/A dst.write(block)
42N/A
42N/A src.close()
42N/A dst.close()
18N/A
18N/A # return the name of the file that we downloaded the data to.
18N/A return filename
18N/A
42N/Adef download_paths(search, filename, url):
18N/A urls = list()
18N/A
42N/A if filename != None:
42N/A tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
42N/A if tmp:
42N/A search += tmp.split(' ')
42N/A
42N/A file = os.path.basename(filename)
42N/A
42N/A urls = [ base + '/' + file for base in search ]
42N/A
42N/A # filename should always be first
42N/A if filename in urls:
42N/A urls.remove(filename)
42N/A urls.insert(0, filename)
42N/A
42N/A # command line url is a fallback, so it's last
42N/A if url != None and url not in urls:
42N/A urls.append(url)
18N/A
18N/A return urls
18N/A
18N/Adef usage():
42N/A print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
18N/A sys.exit(1)
18N/A
18N/Adef main():
18N/A import getopt
18N/A
18N/A # FLUSH STDOUT
18N/A sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
18N/A
42N/A file_arg = None
42N/A link_arg = False
42N/A hash_arg = None
42N/A url_arg = None
42N/A search_list = list()
18N/A
18N/A try:
42N/A opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
42N/A ["file=", "link", "hash=", "search=", "url="])
18N/A except getopt.GetoptError, err:
18N/A print str(err)
18N/A usage()
18N/A
18N/A for opt, arg in opts:
18N/A if opt in [ "-f", "--file" ]:
42N/A file_arg = arg
42N/A elif opt in [ "-l", "--link" ]:
42N/A link_arg = True
18N/A elif opt in [ "-h", "--hash" ]:
42N/A hash_arg = arg
18N/A elif opt in [ "-s", "--search" ]:
42N/A search_list.append(arg)
18N/A elif opt in [ "-u", "--url" ]:
42N/A url_arg = arg
18N/A else:
18N/A assert False, "unknown option"
18N/A
42N/A if url_arg == None:
18N/A usage()
18N/A
42N/A for url in download_paths(search_list, file_arg, url_arg):
42N/A print "Source %s..." % url,
18N/A
42N/A scheme, path = splittype(url)
42N/A name = file_arg
18N/A
42N/A if scheme in [ None, 'file' ]:
42N/A if os.path.exists(path) == False:
58N/A print "not found, skipping file copy"
42N/A continue
42N/A elif name != path:
42N/A if link_arg == False:
58N/A print "\n copying..."
42N/A shutil.copy2(path, name)
42N/A else:
58N/A print "\n linking..."
42N/A os.symlink(path, name)
42N/A else:
42N/A pass
42N/A elif scheme in [ 'http', 'https', 'ftp' ]:
58N/A print "\n downloading...",
42N/A name = download(url, file_arg)
42N/A if name == None:
18N/A print "failed"
18N/A continue
18N/A
58N/A print "\n validating...",
58N/A if hash_arg == None:
58N/A print "skipping (no hash)"
58N/A sys.exit(0)
58N/A
135N/A realhash = validate_container(name, hash_arg)
58N/A if realhash == hash_arg:
42N/A print "ok"
18N/A sys.exit(0)
18N/A else:
135N/A payloadhash = validate_payload(name, hash_arg)
135N/A if payloadhash == hash_arg:
135N/A print "ok"
135N/A sys.exit(0)
58N/A print "corruption detected"
58N/A print " expected: %s" % hash_arg
58N/A print " actual: %s" % realhash
135N/A print " payload: %s" % payloadhash
18N/A
18N/A try:
42N/A os.remove(name)
18N/A except OSError:
18N/A pass
18N/A
18N/A sys.exit(1)
18N/A
18N/Aif __name__ == "__main__":
18N/A main()