userland-fetch revision 212
98N/A#!/usr/bin/python2.6
98N/A#
1422N/A# CDDL HEADER START
98N/A#
98N/A# The contents of this file are subject to the terms of the
919N/A# Common Development and Distribution License (the "License").
919N/A# You may not use this file except in compliance with the License.
919N/A#
919N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
919N/A# or http://www.opensolaris.org/os/licensing.
919N/A# See the License for the specific language governing permissions
919N/A# and limitations under the License.
919N/A#
919N/A# When distributing Covered Code, include this CDDL HEADER in each
919N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
919N/A# If applicable, add the following below this CDDL HEADER, with the
919N/A# fields enclosed by brackets "[]" replaced with your own identifying
919N/A# information: Portions Copyright [yyyy] [name of copyright owner]
919N/A#
919N/A# CDDL HEADER END
919N/A#
919N/A# Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
98N/A#
98N/A#
493N/A# fetch.py - a file download utility
1059N/A#
98N/A# A simple program similiar to wget(1), but handles local file copy, ignores
970N/A# directories, and verifies file hashes.
970N/A#
970N/A
970N/Aimport os
1360N/Aimport sys
1360N/Aimport shutil
970N/Afrom urllib import splittype, urlopen
970N/Aimport hashlib
970N/A
970N/Adef printIOError(e, txt):
970N/A """ Function to decode and print IOError type exception """
98N/A print "I/O Error: " + txt + ": "
1351N/A try:
493N/A (code, message) = e
911N/A print str(message) + " (" + str(code) + ")"
1351N/A except:
1351N/A print str(e)
98N/A
98N/Adef validate(file, hash):
1059N/A algorithm, hashvalue = hash.split(':')
156N/A try:
98N/A m = hashlib.new(algorithm)
98N/A except ValueError:
1422N/A return False
1059N/A
1059N/A while True:
98N/A try:
1059N/A block = file.read()
1059N/A except IOError, err:
1059N/A print str(err),
1059N/A break
1059N/A
98N/A m.update(block)
98N/A if block == '':
1097N/A break
493N/A
1262N/A return "%s:%s" % (algorithm, m.hexdigest())
1262N/A
231N/Adef validate_container(filename, hash):
231N/A try:
1097N/A file = open(filename, 'r')
493N/A except IOError as e:
98N/A printIOError(e, "Can't open file " + filename)
970N/A return False
970N/A return validate(file, hash)
970N/A
98N/A
493N/Adef validate_payload(filename, hash):
98N/A import re
98N/A import gzip
98N/A import bz2
98N/A
851N/A expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
1196N/A expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
659N/A
378N/A try:
659N/A if expr_bz.match(filename):
378N/A file = bz2.BZ2File(filename, 'r')
98N/A elif expr_gz.match(filename):
493N/A file = gzip.GzipFile(filename, 'r')
606N/A else:
606N/A return False
98N/A except IOError as e:
493N/A printIOError(e, "Can't open archive " + filename)
606N/A return False
606N/A return validate(file, hash)
98N/A
98N/A
493N/Adef download(url, filename = None):
606N/A src = None
606N/A
98N/A try:
98N/A src = urlopen(url)
1097N/A except IOError as e:
98N/A printIOError(e, "Can't open url " + url)
606N/A return None
606N/A
1097N/A # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
1097N/A if 3 <= int(src.getcode()/100) <= 5:
1262N/A print "Error code: " + str(src.getcode())
231N/A return None
231N/A
606N/A if filename == None:
606N/A filename = src.geturl().split('/')[-1]
98N/A
98N/A try:
dst = open(filename, 'wb');
except IOError as e:
printIOError(e, "Can't open file " + filename + " for writing")
src.close()
return None
while True:
block = src.read()
if block == '':
break;
dst.write(block)
src.close()
dst.close()
# return the name of the file that we downloaded the data to.
return filename
def download_paths(search, filename, url):
urls = list()
if filename != None:
tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
if tmp:
search += tmp.split(' ')
file = os.path.basename(filename)
urls = [ base + '/' + file for base in search ]
# filename should always be first
if filename in urls:
urls.remove(filename)
urls.insert(0, filename)
# command line url is a fallback, so it's last
if url != None and url not in urls:
urls.append(url)
return urls
def usage():
print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
sys.exit(1)
def main():
import getopt
# FLUSH STDOUT
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
file_arg = None
link_arg = False
hash_arg = None
url_arg = None
search_list = list()
try:
opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
["file=", "link", "hash=", "search=", "url="])
except getopt.GetoptError, err:
print str(err)
usage()
for opt, arg in opts:
if opt in [ "-f", "--file" ]:
file_arg = arg
elif opt in [ "-l", "--link" ]:
link_arg = True
elif opt in [ "-h", "--hash" ]:
hash_arg = arg
elif opt in [ "-s", "--search" ]:
search_list.append(arg)
elif opt in [ "-u", "--url" ]:
url_arg = arg
else:
assert False, "unknown option"
if url_arg == None:
usage()
for url in download_paths(search_list, file_arg, url_arg):
print "Source %s..." % url,
scheme, path = splittype(url)
name = file_arg
if scheme in [ None, 'file' ]:
if os.path.exists(path) == False:
print "not found, skipping file copy"
continue
elif name != path:
if link_arg == False:
print "\n copying..."
shutil.copy2(path, name)
else:
print "\n linking..."
os.symlink(path, name)
else:
pass
elif scheme in [ 'http', 'https', 'ftp' ]:
print "\n downloading...",
name = download(url, file_arg)
if name == None:
print "failed"
continue
print "\n validating...",
if hash_arg == None:
print "skipping (no hash)"
sys.exit(0)
realhash = validate_container(name, hash_arg)
if realhash == hash_arg:
print "ok"
sys.exit(0)
else:
payloadhash = validate_payload(name, hash_arg)
if payloadhash == hash_arg:
print "ok"
sys.exit(0)
print "corruption detected"
print " expected: %s" % hash_arg
print " actual: %s" % realhash
print " payload: %s" % payloadhash
try:
os.remove(name)
except OSError:
pass
sys.exit(1)
if __name__ == "__main__":
main()