tools/userland-fetch
changeset 5862 ce31ce66da02
parent 5682 94c0ca64c022
child 5952 0ff1d1b8de45
--- a/tools/userland-fetch	Thu Apr 21 12:46:25 2016 -0400
+++ b/tools/userland-fetch	Wed Apr 27 14:28:37 2016 -0700
@@ -32,11 +32,13 @@
 
 import errno
 import os
+import re
 import sys
 import shutil
+import json
 import subprocess
-from urllib import splittype
-from urllib2 import urlopen
+from urllib import splittype, splithost
+from urllib2 import urlopen, HTTPError
 import hashlib
 
 def printIOError(e, txt):
@@ -130,7 +132,6 @@
 	"""Given a file path and a hash string, verify that the hash matches the
 	payload (uncompressed content) of the file."""
 
-	import re
 	import gzip
 	import bz2
 
@@ -152,7 +153,6 @@
 		return False
 	return validate(file, hash)
 
-
 def download(url, timeout, filename=None, quiet=False):
 	"""Download the content at the given URL to the given filename
 	(defaulting to the basename of the URL if not given.  If 'quiet' is
@@ -197,6 +197,65 @@
 	# return the name of the file that we downloaded the data to.
 	return filename
 
+def pypi_url(url, filename):
+	"""Given a pypi: URL, return the real URL for that component/version.
+
+	The pypi scheme has a host (with an empty host defaulting to
+	pypi.python.org), and a path that should be of the form
+	"component==version".  Other specs could be supported, but == is the
+	only thing that makes sense in this context.
+
+	The filename argument is the name of the expected file to download, so
+	that when pypi gives us multiple archives to choose from, we can pick
+	the right one.
+	"""
+
+	host, path = splithost(splittype(url)[1])
+
+	# We have to use ==; anything fancier would require pkg_resources, but
+	# really that's the only thing that makes sense in this context.
+	try:
+		name, version = re.match("/(.*)==(.*)$", path).groups()
+	except AttributeError:
+		print "PyPI URLs must be of the form 'pypi:///component==version'"
+		return None
+
+	if not host:
+		jsurl = "http://pypi.python.org/pypi/%s/json" % name
+	else:
+		jsurl = "http://%s/pypi/%s/json" % (host, name)
+
+	try:
+		f = urlopen(jsurl)
+	except HTTPError as e:
+		if e.getcode() == 404:
+			print "Unknown component '%s'" % name
+		else:
+			printIOError(e, "Can't open PyPI JSON url %s" % url)
+		return None
+	except IOError as e:
+		printIOError(e, "Can't open PyPI JSON url %s" % url)
+		return None
+
+	js = json.load(f)
+	try:
+		verblock = js["releases"][version]
+	except KeyError:
+		print "Unknown version '%s'" % version
+		return None
+
+	urls = [ d["url"] for d in verblock ]
+	for archiveurl in urls:
+		if archiveurl.endswith("/%s" % filename):
+			return archiveurl
+
+	if urls:
+		print "None of the following URLs delivers '%s':" % filename
+		print "  " + "\n  ".join(urls)
+	else:
+		print "Couldn't find any suitable URLs"
+	return None
+
 def download_paths(search, filename, url):
 	"""Returns a list of URLs where the file 'filename' might be found,
 	using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
@@ -222,7 +281,11 @@
 
 	# command line url is a fallback, so it's last
 	if url != None and url not in urls:
-		urls.append(url)
+		scheme, path = splittype(url)
+		if scheme == "pypi":
+			url = pypi_url(url, filename)
+		if url != None and url not in urls:
+			urls.append(url)
 
 	return urls