tools/userland-fetch
changeset 3533 0b8107a40da7
parent 832 d0946a4ddb78
child 3770 ca450a806cc1
equal deleted inserted replaced
3531:18f175f98e0e 3533:0b8107a40da7
    17 # fields enclosed by brackets "[]" replaced with your own identifying
    17 # fields enclosed by brackets "[]" replaced with your own identifying
    18 # information: Portions Copyright [yyyy] [name of copyright owner]
    18 # information: Portions Copyright [yyyy] [name of copyright owner]
    19 #
    19 #
    20 # CDDL HEADER END
    20 # CDDL HEADER END
    21 #
    21 #
    22 # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
    22 # Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
    23 #
    23 #
    24 #
    24 #
    25 # fetch.py - a file download utility
    25 # userland-fetch - a file download utility
    26 #
    26 #
    27 #  A simple program similiar to wget(1), but handles local file copy, ignores
    27 #  A simple program similiar to wget(1), but handles local file copy, ignores
    28 #  directories, and verifies file hashes.
    28 #  directories, and verifies file hashes.
    29 #
    29 #
    30 
    30 
       
    31 import errno
    31 import os
    32 import os
    32 import sys
    33 import sys
    33 import shutil
    34 import shutil
       
    35 import subprocess
    34 from urllib import splittype
    36 from urllib import splittype
    35 from urllib2 import urlopen
    37 from urllib2 import urlopen
    36 import hashlib
    38 import hashlib
    37 
    39 
    38 def printIOError(e, txt):
    40 def printIOError(e, txt):
    41 	try:
    43 	try:
    42 		(code, message) = e
    44 		(code, message) = e
    43 		print str(message) + " (" + str(code) + ")"
    45 		print str(message) + " (" + str(code) + ")"
    44 	except:
    46 	except:
    45 		print str(e)
    47 		print str(e)
       
    48 
       
    49 def validate_signature(path, signature):
       
    50 	"""Given paths to a file and a detached PGP signature, verify that
       
    51 	the signature is valid for the file.  Current configuration allows for
       
    52 	unrecognized keys to be downloaded as necessary."""
       
    53 
       
    54 	# Find the root of the repo so that we can point GnuPG at the right
       
    55 	# configuration and keyring.
       
    56 	proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE)
       
    57 	proc.wait()
       
    58 	if proc.returncode != 0:
       
    59 		return False
       
    60 	out, err = proc.communicate()
       
    61 	gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
       
    62 
       
    63         # Skip the permissions warning: none of the information here is private,
       
    64         # so not having to worry about getting mercurial keeping the directory
       
    65         # unreadable is just simplest.
       
    66 	try:
       
    67 		proc = subprocess.Popen(["gpg2", "--verify",
       
    68 		    "--no-permission-warning", "--homedir", gpgdir, signature,
       
    69 		    path], stdin=open("/dev/null"),
       
    70 		    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
       
    71 	except OSError as e:
       
    72 		# If the executable simply couldn't be found, just skip the
       
    73 		# validation.
       
    74 		if e.errno == errno.ENOENT:
       
    75 			return False
       
    76 		raise
       
    77 
       
    78         proc.wait()
       
    79         if proc.returncode != 0:
       
    80 		# Only print GnuPG's output when there was a problem.
       
    81                 print proc.stdout.read()
       
    82                 return False
       
    83         return True
    46 	
    84 	
    47 def validate(file, hash):
    85 def validate(file, hash):
    48 	algorithm, hashvalue = hash.split(':')
    86 	"""Given a file-like object and a hash string, verify that the hash
       
    87 	matches the file contents."""
       
    88 
       
    89 	try:
       
    90 		algorithm, hashvalue = hash.split(':')
       
    91 	except:
       
    92 		algorithm = "sha256"
    49 
    93 
    50 	# force migration away from sha1
    94 	# force migration away from sha1
    51 	if algorithm == "sha1":
    95 	if algorithm == "sha1":
    52 		algorithm = "sha256"
    96 		algorithm = "sha256"
    53 	try:
    97 	try:
    67 			break
   111 			break
    68 
   112 
    69 	return "%s:%s" % (algorithm, m.hexdigest())
   113 	return "%s:%s" % (algorithm, m.hexdigest())
    70 
   114 
    71 def validate_container(filename, hash):
   115 def validate_container(filename, hash):
       
   116 	"""Given a file path and a hash string, verify that the hash matches the
       
   117 	file contents."""
       
   118 
    72 	try:
   119 	try:
    73 		file = open(filename, 'r')
   120 		file = open(filename, 'r')
    74 	except IOError as e:
   121 	except IOError as e:
    75 		printIOError(e, "Can't open file " + filename)
   122 		printIOError(e, "Can't open file " + filename)
    76 		return False
   123 		return False
    77 	return validate(file, hash)
   124 	return validate(file, hash)
    78 
   125 
    79 
   126 
    80 def validate_payload(filename, hash):
   127 def validate_payload(filename, hash):
       
   128 	"""Given a file path and a hash string, verify that the hash matches the
       
   129 	payload (uncompressed content) of the file."""
       
   130 
    81 	import re
   131 	import re
    82 	import gzip
   132 	import gzip
    83 	import bz2
   133 	import bz2
    84 
   134 
    85 	expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
   135 	expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
    99 		printIOError(e, "Can't open archive " + filename)
   149 		printIOError(e, "Can't open archive " + filename)
   100 		return False
   150 		return False
   101 	return validate(file, hash)
   151 	return validate(file, hash)
   102 
   152 
   103 
   153 
   104 def download(url, filename = None):
   154 def download(url, filename=None, quiet=False):
       
   155 	"""Download the content at the given URL to the given filename
       
   156 	(defaulting to the basename of the URL if not given.  If 'quiet' is
       
   157 	True, throw away any error messages.  Returns the name of the file to
       
   158 	which the content was donloaded."""
       
   159 
   105 	src = None
   160 	src = None
   106 
   161 
   107 	try:
   162 	try:
   108 		src = urlopen(url)
   163 		src = urlopen(url)
   109 	except IOError as e:
   164 	except IOError as e:
   110 		printIOError(e, "Can't open url " + url)
   165 		if not quiet:
       
   166 			printIOError(e, "Can't open url " + url)
   111 		return None
   167 		return None
   112 
   168 
   113 	# 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
   169 	# 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
   114 	if 3 <= int(src.getcode()/100) <= 5:
   170 	if 3 <= int(src.getcode()/100) <= 5:
   115 		print "Error code: " + str(src.getcode())
   171 		if not quiet:
       
   172 			print "Error code: " + str(src.getcode())
   116 		return None
   173 		return None
   117 
   174 
   118 	if filename == None:
   175 	if filename == None:
   119 		filename = src.geturl().split('/')[-1]
   176 		filename = src.geturl().split('/')[-1]
   120 
   177 
   121 	try:
   178 	try:
   122 		dst = open(filename, 'wb');
   179 		dst = open(filename, 'wb');
   123 	except IOError as e:
   180 	except IOError as e:
   124 		printIOError(e, "Can't open file " + filename + " for writing")
   181 		if not quiet:
       
   182 			printIOError(e, "Can't open file " + filename + " for writing")
   125 		src.close()
   183 		src.close()
   126 		return None
   184 		return None
   127 
   185 
   128 	while True:
   186 	while True:
   129 		block = src.read()
   187 		block = src.read()
   136 
   194 
   137 	# return the name of the file that we downloaded the data to.
   195 	# return the name of the file that we downloaded the data to.
   138 	return filename
   196 	return filename
   139 
   197 
   140 def download_paths(search, filename, url):
   198 def download_paths(search, filename, url):
       
   199 	"""Returns a list of URLs where the file 'filename' might be found,
       
   200 	using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
       
   201 
       
   202 	If 'filename' is None, then the list will simply contain 'url'.
       
   203 	"""
       
   204 
   141 	urls = list()
   205 	urls = list()
   142 
   206 
   143 	if filename != None:
   207 	if filename != None:
   144 		tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
   208 		tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
   145 		if tmp:
   209 		if tmp:
   158 	if url != None and url not in urls:
   222 	if url != None and url not in urls:
   159 		urls.append(url)
   223 		urls.append(url)
   160 
   224 
   161 	return urls
   225 	return urls
   162 
   226 
       
   227 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False):
       
   228 	"""Attempts to download a file from a number of possible locations.
       
   229 	Generates a list of paths where the file ends up on the local
       
   230 	filesystem.  This is a generator because while a download might be
       
   231 	successful, the signature or hash may not validate, and the caller may
       
   232 	want to try again from the next location.  The 'link_arg' argument is a
       
   233 	boolean which, when True, specifies that if the source is not a remote
       
   234 	URL and not already found where it should be, to make a symlink to the
       
   235 	source rather than copying it.
       
   236 	"""
       
   237 	for url in download_paths(search_list, file_arg, url):
       
   238 		if not quiet:
       
   239 			print "Source %s..." % url,
       
   240 
       
   241 		scheme, path = splittype(url)
       
   242 		name = file_arg
       
   243 
       
   244 		if scheme in [ None, 'file' ]:
       
   245 			if os.path.exists(path) == False:
       
   246 				if not quiet:
       
   247 					print "not found, skipping file copy"
       
   248 				continue
       
   249 			elif name and name != path:
       
   250 				if link_arg == False:
       
   251 					if not quiet:
       
   252 						print "\n    copying..."
       
   253 					shutil.copy2(path, name)
       
   254 				else:
       
   255 					if not quiet:
       
   256 						print "\n    linking..."
       
   257 					os.symlink(path, name)
       
   258 		elif scheme in [ 'http', 'https', 'ftp' ]:
       
   259 			if not quiet:
       
   260 				print "\n    downloading...",
       
   261 			name = download(url, file_arg, quiet)
       
   262 			if name == None:
       
   263 				if not quiet:
       
   264 					print "failed"
       
   265 				continue
       
   266 
       
   267 		yield name
       
   268 
   163 def usage():
   269 def usage():
   164 	print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1])
   270 	print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] " \
       
   271           "[-s|--search (search-dir)] [-S|--sigurl (signature-url)] --url (url)" % \
       
   272           (sys.argv[0].split('/')[-1])
   165 	sys.exit(1)
   273 	sys.exit(1)
   166 
   274 
   167 def main():
   275 def main():
   168 	import getopt
   276 	import getopt
   169 
   277 
   172 
   280 
   173 	file_arg = None
   281 	file_arg = None
   174 	link_arg = False
   282 	link_arg = False
   175 	hash_arg = None
   283 	hash_arg = None
   176 	url_arg = None
   284 	url_arg = None
       
   285 	sig_arg = None
   177 	search_list = list()
   286 	search_list = list()
   178 
   287 
   179 	try:
   288 	try:
   180 		opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:u:",
   289                 opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:S:u:",
   181 			["file=", "link", "hash=", "search=", "url="])
   290 			["file=", "link", "hash=", "search=", "sigurl=", "url="])
   182 	except getopt.GetoptError, err:
   291 	except getopt.GetoptError, err:
   183 		print str(err)
   292 		print str(err)
   184 		usage()
   293 		usage()
   185 
   294 
   186 	for opt, arg in opts:
   295 	for opt, arg in opts:
   190 			link_arg = True
   299 			link_arg = True
   191 		elif opt in [ "-h", "--hash" ]:
   300 		elif opt in [ "-h", "--hash" ]:
   192 			hash_arg = arg
   301 			hash_arg = arg
   193 		elif opt in [ "-s", "--search" ]:
   302 		elif opt in [ "-s", "--search" ]:
   194 			search_list.append(arg)
   303 			search_list.append(arg)
       
   304 		elif opt in [ "-S", "--sigurl" ]:
       
   305 			sig_arg = arg
   195 		elif opt in [ "-u", "--url" ]:
   306 		elif opt in [ "-u", "--url" ]:
   196 			url_arg = arg
   307 			url_arg = arg
   197 		else:
   308 		else:
   198 			assert False, "unknown option"
   309 			assert False, "unknown option"
   199 
   310 
   200 	if url_arg == None:
   311 	if url_arg == None:
   201 		usage()
   312 		usage()
   202 
   313 
   203 	for url in download_paths(search_list, file_arg, url_arg):
   314 	for name in download_from_paths(search_list, file_arg, url_arg, link_arg):
   204 		print "Source %s..." % url,
   315 		print "\n    validating signature...",
   205 
   316 
   206 		scheme, path = splittype(url)
   317 		sig_valid = False
   207 		name = file_arg
   318 		if not sig_arg:
   208 
   319 			print "skipping (no signature URL)"
   209 		if scheme in [ None, 'file' ]:
   320 		else:
   210 			if os.path.exists(path) == False:
   321 			# Put the signature file in the same directory as the
   211 				print "not found, skipping file copy"
   322 			# file we're downloading.
   212 				continue
   323 			sig_file = os.path.join(
   213 			elif name != path:
   324 			    os.path.dirname(file_arg),
   214 				if link_arg == False:
   325 			    os.path.basename(sig_arg))
   215 					print "\n    copying..."
   326 			# Validate with the first signature we find.
   216 					shutil.copy2(path, name)
   327 			for sig_file in download_from_paths(search_list, sig_file,
       
   328 			    sig_arg, link_arg, True):
       
   329 				if sig_file:
       
   330 					if validate_signature(name, sig_file):
       
   331 						print "ok"
       
   332 						sig_valid = True
       
   333 					else:
       
   334 						print "failed"
       
   335 					break
   217 				else:
   336 				else:
   218 					print "\n    linking..."
   337 					continue
   219 					os.symlink(path, name)
       
   220 			else:
   338 			else:
   221 				pass
   339 				print "failed (couldn't fetch signature)"
   222 		elif scheme in [ 'http', 'https', 'ftp' ]:
   340 
   223 			print "\n    downloading...",
   341 		print "    validating hash...",
   224 			name = download(url, file_arg)
   342 		realhash = validate_container(name, hash_arg)
   225 			if name == None:
   343 
   226 				print "failed"
   344 		if not hash_arg:
   227 				continue
       
   228 
       
   229 		print "\n    validating...",
       
   230 		if hash_arg == None:
       
   231 			print "skipping (no hash)"
   345 			print "skipping (no hash)"
   232 			sys.exit(0)
   346 			print "hash is: %s" % realhash
   233 			
   347 		elif realhash == hash_arg:
   234 		realhash = validate_container(name, hash_arg)
       
   235 		if realhash == hash_arg:
       
   236 			print "ok"
   348 			print "ok"
   237 			sys.exit(0)
       
   238 		else:
   349 		else:
   239 			payloadhash = validate_payload(name, hash_arg)
   350 			payloadhash = validate_payload(name, hash_arg)
   240 			if payloadhash == hash_arg:
   351 			if payloadhash == hash_arg:
   241 				print "ok"
   352 				print "ok"
   242 				sys.exit(0)
   353 			else:
   243 			print "corruption detected"
   354 				# If the signature validated, then we assume
   244 			print "    expected: %s" % hash_arg
   355 				# that the expected hash is just a typo, but we
   245 			print "    actual:   %s" % realhash
   356 				# warn just in case.
   246 			print "    payload:  %s" % payloadhash
   357 				if sig_valid:
   247 
   358 					print "invalid hash!"
   248 		try:
   359 				else:
   249 			os.remove(name)
   360 					print "corruption detected"
   250 		except OSError:
   361 
   251 			pass
   362 				print "    expected: %s" % hash_arg
   252 
   363 				print "    actual:   %s" % realhash
       
   364 				print "    payload:  %s" % payloadhash
       
   365 
       
   366 				# An invalid hash shouldn't cause us to remove
       
   367 				# the target file if the signature was valid.
       
   368 				if not sig_valid:
       
   369 					try:
       
   370 						os.remove(name)
       
   371 					except OSError:
       
   372 						pass
       
   373 
       
   374 					continue
       
   375 
       
   376 		sys.exit(0)
   253 	sys.exit(1)
   377 	sys.exit(1)
   254 
   378 
   255 if __name__ == "__main__":
   379 if __name__ == "__main__":
   256 	main()
   380 	main()