tools/userland-fetch
changeset 3533 0b8107a40da7
parent 832 d0946a4ddb78
child 3770 ca450a806cc1
--- a/tools/userland-fetch	Tue Dec 09 18:47:43 2014 +0100
+++ b/tools/userland-fetch	Tue Dec 09 11:43:38 2014 -0800
@@ -19,18 +19,20 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
 #
 #
-# fetch.py - a file download utility
+# userland-fetch - a file download utility
 #
 #  A simple program similiar to wget(1), but handles local file copy, ignores
 #  directories, and verifies file hashes.
 #
 
+import errno
 import os
 import sys
 import shutil
+import subprocess
 from urllib import splittype
 from urllib2 import urlopen
 import hashlib
@@ -43,9 +45,51 @@
 		print str(message) + " (" + str(code) + ")"
 	except:
 		print str(e)
+
+def validate_signature(path, signature):
+	"""Given paths to a file and a detached PGP signature, verify that
+	the signature is valid for the file.  Current configuration allows for
+	unrecognized keys to be downloaded as necessary."""
+
+	# Find the root of the repo so that we can point GnuPG at the right
+	# configuration and keyring.
+	proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE)
+	proc.wait()
+	if proc.returncode != 0:
+		return False
+	out, err = proc.communicate()
+	gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
+
+        # Skip the permissions warning: none of the information here is private,
+        # so not having to worry about getting mercurial keeping the directory
+        # unreadable is just simplest.
+	try:
+		proc = subprocess.Popen(["gpg2", "--verify",
+		    "--no-permission-warning", "--homedir", gpgdir, signature,
+		    path], stdin=open("/dev/null"),
+		    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+	except OSError as e:
+		# If the executable simply couldn't be found, just skip the
+		# validation.
+		if e.errno == errno.ENOENT:
+			return False
+		raise
+
+        proc.wait()
+        if proc.returncode != 0:
+		# Only print GnuPG's output when there was a problem.
+                print proc.stdout.read()
+                return False
+        return True
 	
 def validate(file, hash):
-	algorithm, hashvalue = hash.split(':')
+	"""Given a file-like object and a hash string, verify that the hash
+	matches the file contents."""
+
+	try:
+		algorithm, hashvalue = hash.split(':')
+	except:
+		algorithm = "sha256"
 
 	# force migration away from sha1
 	if algorithm == "sha1":
@@ -69,6 +113,9 @@
 	return "%s:%s" % (algorithm, m.hexdigest())
 
 def validate_container(filename, hash):
+	"""Given a file path and a hash string, verify that the hash matches the
+	file contents."""
+
 	try:
 		file = open(filename, 'r')
 	except IOError as e:
@@ -78,6 +125,9 @@
 
 
 def validate_payload(filename, hash):
+	"""Given a file path and a hash string, verify that the hash matches the
+	payload (uncompressed content) of the file."""
+
 	import re
 	import gzip
 	import bz2
@@ -101,18 +151,25 @@
 	return validate(file, hash)
 
 
-def download(url, filename = None):
+def download(url, filename=None, quiet=False):
+	"""Download the content at the given URL to the given filename
+	(defaulting to the basename of the URL if not given.  If 'quiet' is
+	True, throw away any error messages.  Returns the name of the file to
+	which the content was donloaded."""
+
 	src = None
 
 	try:
 		src = urlopen(url)
 	except IOError as e:
-		printIOError(e, "Can't open url " + url)
+		if not quiet:
+			printIOError(e, "Can't open url " + url)
 		return None
 
 	# 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
 	if 3 <= int(src.getcode()/100) <= 5:
-		print "Error code: " + str(src.getcode())
+		if not quiet:
+			print "Error code: " + str(src.getcode())
 		return None
 
 	if filename == None:
@@ -121,7 +178,8 @@
 	try:
 		dst = open(filename, 'wb');
 	except IOError as e:
-		printIOError(e, "Can't open file " + filename + " for writing")
+		if not quiet:
+			printIOError(e, "Can't open file " + filename + " for writing")
 		src.close()
 		return None
 
@@ -138,6 +196,12 @@
 	return filename
 
 def download_paths(search, filename, url):
+	"""Returns a list of URLs where the file 'filename' might be found,
+	using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
+
+	If 'filename' is None, then the list will simply contain 'url'.
+	"""
+
 	urls = list()
 
 	if filename != None:
@@ -160,8 +224,52 @@
 
 	return urls
 
+def download_from_paths(search_list, file_arg, url, link_arg, quiet=False):
+	"""Attempts to download a file from a number of possible locations.
+	Generates a list of paths where the file ends up on the local
+	filesystem.  This is a generator because while a download might be
+	successful, the signature or hash may not validate, and the caller may
+	want to try again from the next location.  The 'link_arg' argument is a
+	boolean which, when True, specifies that if the source is not a remote
+	URL and not already found where it should be, to make a symlink to the
+	source rather than copying it.
+	"""
+	for url in download_paths(search_list, file_arg, url):
+		if not quiet:
+			print "Source %s..." % url,
+
+		scheme, path = splittype(url)
+		name = file_arg
+
+		if scheme in [ None, 'file' ]:
+			if os.path.exists(path) == False:
+				if not quiet:
+					print "not found, skipping file copy"
+				continue
+			elif name and name != path:
+				if link_arg == False:
+					if not quiet:
+						print "\n    copying..."
+					shutil.copy2(path, name)
+				else:
+					if not quiet:
+						print "\n    linking..."
+					os.symlink(path, name)
+		elif scheme in [ 'http', 'https', 'ftp' ]:
+			if not quiet:
+				print "\n    downloading...",
+			name = download(url, file_arg, quiet)
+			if name == None:
+				if not quiet:
+					print "failed"
+				continue
+
+		yield name
+
 def usage():
-	print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
+	print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] " \
+          "[-s|--search (search-dir)] [-S|--sigurl (signature-url)] --url (url)" % \
+          (sys.argv[0].split('/')[-1])
 	sys.exit(1)
 
 def main():
@@ -174,11 +282,12 @@
 	link_arg = False
 	hash_arg = None
 	url_arg = None
+	sig_arg = None
 	search_list = list()
 
 	try:
-		opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
-			["file=", "link", "hash=", "search=", "url="])
+                opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:S:u:",
+			["file=", "link", "hash=", "search=", "sigurl=", "url="])
 	except getopt.GetoptError, err:
 		print str(err)
 		usage()
@@ -192,6 +301,8 @@
 			hash_arg = arg
 		elif opt in [ "-s", "--search" ]:
 			search_list.append(arg)
+		elif opt in [ "-S", "--sigurl" ]:
+			sig_arg = arg
 		elif opt in [ "-u", "--url" ]:
 			url_arg = arg
 		else:
@@ -200,56 +311,69 @@
 	if url_arg == None:
 		usage()
 
-	for url in download_paths(search_list, file_arg, url_arg):
-		print "Source %s..." % url,
-
-		scheme, path = splittype(url)
-		name = file_arg
+	for name in download_from_paths(search_list, file_arg, url_arg, link_arg):
+		print "\n    validating signature...",
 
-		if scheme in [ None, 'file' ]:
-			if os.path.exists(path) == False:
-				print "not found, skipping file copy"
-				continue
-			elif name != path:
-				if link_arg == False:
-					print "\n    copying..."
-					shutil.copy2(path, name)
+		sig_valid = False
+		if not sig_arg:
+			print "skipping (no signature URL)"
+		else:
+			# Put the signature file in the same directory as the
+			# file we're downloading.
+			sig_file = os.path.join(
+			    os.path.dirname(file_arg),
+			    os.path.basename(sig_arg))
+			# Validate with the first signature we find.
+			for sig_file in download_from_paths(search_list, sig_file,
+			    sig_arg, link_arg, True):
+				if sig_file:
+					if validate_signature(name, sig_file):
+						print "ok"
+						sig_valid = True
+					else:
+						print "failed"
+					break
 				else:
-					print "\n    linking..."
-					os.symlink(path, name)
+					continue
 			else:
-				pass
-		elif scheme in [ 'http', 'https', 'ftp' ]:
-			print "\n    downloading...",
-			name = download(url, file_arg)
-			if name == None:
-				print "failed"
-				continue
+				print "failed (couldn't fetch signature)"
+
+		print "    validating hash...",
+		realhash = validate_container(name, hash_arg)
 
-		print "\n    validating...",
-		if hash_arg == None:
+		if not hash_arg:
 			print "skipping (no hash)"
-			sys.exit(0)
-			
-		realhash = validate_container(name, hash_arg)
-		if realhash == hash_arg:
+			print "hash is: %s" % realhash
+		elif realhash == hash_arg:
 			print "ok"
-			sys.exit(0)
 		else:
 			payloadhash = validate_payload(name, hash_arg)
 			if payloadhash == hash_arg:
 				print "ok"
-				sys.exit(0)
-			print "corruption detected"
-			print "    expected: %s" % hash_arg
-			print "    actual:   %s" % realhash
-			print "    payload:  %s" % payloadhash
+			else:
+				# If the signature validated, then we assume
+				# that the expected hash is just a typo, but we
+				# warn just in case.
+				if sig_valid:
+					print "invalid hash!"
+				else:
+					print "corruption detected"
 
-		try:
-			os.remove(name)
-		except OSError:
-			pass
+				print "    expected: %s" % hash_arg
+				print "    actual:   %s" % realhash
+				print "    payload:  %s" % payloadhash
 
+				# An invalid hash shouldn't cause us to remove
+				# the target file if the signature was valid.
+				if not sig_valid:
+					try:
+						os.remove(name)
+					except OSError:
+						pass
+
+					continue
+
+		sys.exit(0)
 	sys.exit(1)
 
 if __name__ == "__main__":