diff -r 18f175f98e0e -r 0b8107a40da7 tools/userland-fetch --- a/tools/userland-fetch Tue Dec 09 18:47:43 2014 +0100 +++ b/tools/userland-fetch Tue Dec 09 11:43:38 2014 -0800 @@ -19,18 +19,20 @@ # # CDDL HEADER END # -# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved. # # -# fetch.py - a file download utility +# userland-fetch - a file download utility # # A simple program similiar to wget(1), but handles local file copy, ignores # directories, and verifies file hashes. # +import errno import os import sys import shutil +import subprocess from urllib import splittype from urllib2 import urlopen import hashlib @@ -43,9 +45,51 @@ print str(message) + " (" + str(code) + ")" except: print str(e) + +def validate_signature(path, signature): + """Given paths to a file and a detached PGP signature, verify that + the signature is valid for the file. Current configuration allows for + unrecognized keys to be downloaded as necessary.""" + + # Find the root of the repo so that we can point GnuPG at the right + # configuration and keyring. + proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE) + proc.wait() + if proc.returncode != 0: + return False + out, err = proc.communicate() + gpgdir = os.path.join(out.strip(), "tools", ".gnupg") + + # Skip the permissions warning: none of the information here is private, + # so not having to worry about getting mercurial keeping the directory + # unreadable is just simplest. + try: + proc = subprocess.Popen(["gpg2", "--verify", + "--no-permission-warning", "--homedir", gpgdir, signature, + path], stdin=open("/dev/null"), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + except OSError as e: + # If the executable simply couldn't be found, just skip the + # validation. + if e.errno == errno.ENOENT: + return False + raise + + proc.wait() + if proc.returncode != 0: + # Only print GnuPG's output when there was a problem. + print proc.stdout.read() + return False + return True def validate(file, hash): - algorithm, hashvalue = hash.split(':') + """Given a file-like object and a hash string, verify that the hash + matches the file contents.""" + + try: + algorithm, hashvalue = hash.split(':') + except: + algorithm = "sha256" # force migration away from sha1 if algorithm == "sha1": @@ -69,6 +113,9 @@ return "%s:%s" % (algorithm, m.hexdigest()) def validate_container(filename, hash): + """Given a file path and a hash string, verify that the hash matches the + file contents.""" + try: file = open(filename, 'r') except IOError as e: @@ -78,6 +125,9 @@ def validate_payload(filename, hash): + """Given a file path and a hash string, verify that the hash matches the + payload (uncompressed content) of the file.""" + import re import gzip import bz2 @@ -101,18 +151,25 @@ return validate(file, hash) -def download(url, filename = None): +def download(url, filename=None, quiet=False): + """Download the content at the given URL to the given filename + (defaulting to the basename of the URL if not given. If 'quiet' is + True, throw away any error messages. Returns the name of the file to + which the content was donloaded.""" + src = None try: src = urlopen(url) except IOError as e: - printIOError(e, "Can't open url " + url) + if not quiet: + printIOError(e, "Can't open url " + url) return None # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action if 3 <= int(src.getcode()/100) <= 5: - print "Error code: " + str(src.getcode()) + if not quiet: + print "Error code: " + str(src.getcode()) return None if filename == None: @@ -121,7 +178,8 @@ try: dst = open(filename, 'wb'); except IOError as e: - printIOError(e, "Can't open file " + filename + " for writing") + if not quiet: + printIOError(e, "Can't open file " + filename + " for writing") src.close() return None @@ -138,6 +196,12 @@ return filename def download_paths(search, filename, url): + """Returns a list of URLs where the file 'filename' might be found, + using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look. + + If 'filename' is None, then the list will simply contain 'url'. + """ + urls = list() if filename != None: @@ -160,8 +224,52 @@ return urls +def download_from_paths(search_list, file_arg, url, link_arg, quiet=False): + """Attempts to download a file from a number of possible locations. + Generates a list of paths where the file ends up on the local + filesystem. This is a generator because while a download might be + successful, the signature or hash may not validate, and the caller may + want to try again from the next location. The 'link_arg' argument is a + boolean which, when True, specifies that if the source is not a remote + URL and not already found where it should be, to make a symlink to the + source rather than copying it. + """ + for url in download_paths(search_list, file_arg, url): + if not quiet: + print "Source %s..." % url, + + scheme, path = splittype(url) + name = file_arg + + if scheme in [ None, 'file' ]: + if os.path.exists(path) == False: + if not quiet: + print "not found, skipping file copy" + continue + elif name and name != path: + if link_arg == False: + if not quiet: + print "\n copying..." + shutil.copy2(path, name) + else: + if not quiet: + print "\n linking..." + os.symlink(path, name) + elif scheme in [ 'http', 'https', 'ftp' ]: + if not quiet: + print "\n downloading...", + name = download(url, file_arg, quiet) + if name == None: + if not quiet: + print "failed" + continue + + yield name + def usage(): - print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1]) + print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] " \ + "[-s|--search (search-dir)] [-S|--sigurl (signature-url)] --url (url)" % \ + (sys.argv[0].split('/')[-1]) sys.exit(1) def main(): @@ -174,11 +282,12 @@ link_arg = False hash_arg = None url_arg = None + sig_arg = None search_list = list() try: - opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:", - ["file=", "link", "hash=", "search=", "url="]) + opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:S:u:", + ["file=", "link", "hash=", "search=", "sigurl=", "url="]) except getopt.GetoptError, err: print str(err) usage() @@ -192,6 +301,8 @@ hash_arg = arg elif opt in [ "-s", "--search" ]: search_list.append(arg) + elif opt in [ "-S", "--sigurl" ]: + sig_arg = arg elif opt in [ "-u", "--url" ]: url_arg = arg else: @@ -200,56 +311,69 @@ if url_arg == None: usage() - for url in download_paths(search_list, file_arg, url_arg): - print "Source %s..." % url, - - scheme, path = splittype(url) - name = file_arg + for name in download_from_paths(search_list, file_arg, url_arg, link_arg): + print "\n validating signature...", - if scheme in [ None, 'file' ]: - if os.path.exists(path) == False: - print "not found, skipping file copy" - continue - elif name != path: - if link_arg == False: - print "\n copying..." - shutil.copy2(path, name) + sig_valid = False + if not sig_arg: + print "skipping (no signature URL)" + else: + # Put the signature file in the same directory as the + # file we're downloading. + sig_file = os.path.join( + os.path.dirname(file_arg), + os.path.basename(sig_arg)) + # Validate with the first signature we find. + for sig_file in download_from_paths(search_list, sig_file, + sig_arg, link_arg, True): + if sig_file: + if validate_signature(name, sig_file): + print "ok" + sig_valid = True + else: + print "failed" + break else: - print "\n linking..." - os.symlink(path, name) + continue else: - pass - elif scheme in [ 'http', 'https', 'ftp' ]: - print "\n downloading...", - name = download(url, file_arg) - if name == None: - print "failed" - continue + print "failed (couldn't fetch signature)" + + print " validating hash...", + realhash = validate_container(name, hash_arg) - print "\n validating...", - if hash_arg == None: + if not hash_arg: print "skipping (no hash)" - sys.exit(0) - - realhash = validate_container(name, hash_arg) - if realhash == hash_arg: + print "hash is: %s" % realhash + elif realhash == hash_arg: print "ok" - sys.exit(0) else: payloadhash = validate_payload(name, hash_arg) if payloadhash == hash_arg: print "ok" - sys.exit(0) - print "corruption detected" - print " expected: %s" % hash_arg - print " actual: %s" % realhash - print " payload: %s" % payloadhash + else: + # If the signature validated, then we assume + # that the expected hash is just a typo, but we + # warn just in case. + if sig_valid: + print "invalid hash!" + else: + print "corruption detected" - try: - os.remove(name) - except OSError: - pass + print " expected: %s" % hash_arg + print " actual: %s" % realhash + print " payload: %s" % payloadhash + # An invalid hash shouldn't cause us to remove + # the target file if the signature was valid. + if not sig_valid: + try: + os.remove(name) + except OSError: + pass + + continue + + sys.exit(0) sys.exit(1) if __name__ == "__main__":