author  Mike Sullivan <Mike.Sullivan@Oracle.COM> 
Fri, 07 Apr 2017 17:56:36 0700  
20440888 Userland python tools should migrate to 2.7
John Beck <John.Beck@Oracle.COM>
1 
#!/usr/bin/python2.7 
2 
# 
3 
# CDDL HEADER START 
4 
# 
5 
# The contents of this file are subject to the terms of the 
6 
# Common Development and Distribution License (the "License"). 
7 
# You may not use this file except in compliance with the License. 
8 
# 
9 
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 
10 
# or http://www.opensolaris.org/os/licensing. 
11 
# See the License for the specific language governing permissions 
12 
# and limitations under the License. 
13 
# 
14 
# When distributing Covered Code, include this CDDL HEADER in each 
15 
# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 
16 
# If applicable, add the following below this CDDL HEADER, with the 
17 
# fields enclosed by brackets "[]" replaced with your own identifying 
18 
# information: Portions Copyright [yyyy] [name of copyright owner] 
19 
# 
20 
# CDDL HEADER END 
21 
# 
22 

23 
# 
24 
# Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved. 
18
25 
# 
26 
# 
3533
0b8107a40da7
20183619 userland should support PGP signatures
Danek Duvall <danek.duvall@oracle.com>
parents:
832
diff
changeset

27 
# userlandfetch  a file download utility 
18
28 
# 
29 
# A simple program similiar to wget(1), but handles local file copy, ignores 
30 
# directories, and verifies file hashes. 
31 
# 
32 

33 
import errno 
18
34 
changeset

35 
import re 
18
36 
import sys 
37 
import shutil 
5862
38 
import json 
39 
import subprocess 
5862
40 
from urllib import splittype, splithost 
41 
from urllib2 import urlopen, HTTPError 
135
42 
import hashlib 
18
43 

136
44 
def printIOError(e, txt): 
45 
""" Function to decode and print IOError type exception """ 
46 
print "I/O Error: " + txt + ": " 
47 
try: 
48 
(code, message) = e 
49 
print str(message) + " (" + str(code) + ")" 
50 
except: 
51 
print str(e) 
3533
52 

53 
def validate_signature(path, signature): 
54 
"""Given paths to a file and a detached PGP signature, verify that 
55 
the signature is valid for the file. Current configuration allows for 
56 
unrecognized keys to be downloaded as necessary.""" 
57 

58 
# Find the root of the repo so that we can point GnuPG at the right 
59 
# configuration and keyring. 
60 
proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE) 
61 
proc.wait() 
62 
if proc.returncode != 0: 
63 
return False 
64 
out, err = proc.communicate() 
65 
gpgdir = os.path.join(out.strip(), "tools", ".gnupg") 
66 

67 
# Skip the permissions warning: none of the information here is private, 
68 
# so not having to worry about getting mercurial keeping the directory 
69 
# unreadable is just simplest. 
70 
try: 
71 
proc = subprocess.Popen(["gpg2", "verify", 
72 
"nopermissionwarning", "homedir", gpgdir, signature, 
73 
path], stdin=open("/dev/null"), 
74 
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 
75 
except OSError as e: 
76 
# If the executable simply couldn't be found, just skip the 
77 
# validation. 
78 
if e.errno == errno.ENOENT: 
79 
return False 
80 
raise 
81 

82 
proc.wait() 
83 
if proc.returncode != 0: 
84 
# Only print GnuPG's output when there was a problem. 
85 
print proc.stdout.read() 
86 
return False 
87 
return True 
5244
88 

135
89 
def validate(file, hash): 
3533
90 
"""Given a filelike object and a hash string, verify that the hash 
91 
matches the file contents.""" 
92 

93 
try: 
94 
algorithm, hashvalue = hash.split(':') 
95 
except: 
96 
algorithm = "sha256" 
800
97 

98 
# force migration away from sha1 
99 
if algorithm == "sha1": 
100 
algorithm = "sha256" 
18
101 
try: 
102 
m = hashlib.new(algorithm) 
103 
except ValueError: 
104 
return False 
105 

106 
while True: 
137
107 
try: 
108 
block = file.read() 
109 
except IOError, err: 
110 
print str(err), 
111 
break 
112 

18
113 
m.update(block) 
114 
if block == '': 
115 
break 
116 

58
117 
return "%s:%s" % (algorithm, m.hexdigest()) 
18
118 

135
119 
def validate_container(filename, hash): 
3533
120 
"""Given a file path and a hash string, verify that the hash matches the 
121 
file contents.""" 
122 

135
123 
try: 
124 
file = open(filename, 'r') 
136
125 
except IOError as e: 
126 
printIOError(e, "Can't open file " + filename) 
135
127 
return False 
128 
return validate(file, hash) 
129 

130 

131 
def validate_payload(filename, hash): 
3533
132 
"""Given a file path and a hash string, verify that the hash matches the 
133 
payload (uncompressed content) of the file.""" 
134 

135
135 
import gzip 
136 
import bz2 
137 

138 
expr_bz = re.compile('.+\.bz2$', re.IGNORECASE) 
139 
expr_gz = re.compile('.+\.gz$', re.IGNORECASE) 
800
140 
expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE) 
135
141 

142 
try: 
143 
if expr_bz.match(filename): 
137
144 
file = bz2.BZ2File(filename, 'r') 
135
145 
elif expr_gz.match(filename): 
146 
file = gzip.GzipFile(filename, 'r') 
800
147 
elif expr_tgz.match(filename): 
148 
file = gzip.GzipFile(filename, 'r') 
135
149 
else: 
150 
return False 
136
151 
except IOError as e: 
152 
printIOError(e, "Can't open archive " + filename) 
135
153 
return False 
154 
return validate(file, hash) 
155 

5244
156 
def download(url, timeout, filename=None, quiet=False): 
3533
157 
"""Download the content at the given URL to the given filename 
158 
(defaulting to the basename of the URL if not given. If 'quiet' is 
159 
True, throw away any error messages. Returns the name of the file to 
160 
which the content was donloaded.""" 
161 

18
162 
src = None 
163 

42
164 
try: 
5244
165 
src = urlopen(url=url, timeout=timeout) 
136
166 
except IOError as e: 
3533
167 
if not quiet: 
168 
printIOError(e, "Can't open url " + url) 
18
169 
return None 
170 

212
171 
# 3xx, 4xx and 5xx (fht)tp codes designate unsuccessfull action 
172 
if 3 <= int(src.getcode()/100) <= 5: 
3533
173 
if not quiet: 
174 
print "Error code: " + str(src.getcode()) 
212
175 
return None 
176 

18
177 
if filename == None: 
42
178 
filename = src.geturl().split('/')[1] 
18
179 

42
180 
try: 
18
181 
dst = open(filename, 'wb'); 
136
182 
except IOError as e: 
3533
183 
if not quiet: 
184 
printIOError(e, "Can't open file " + filename + " for writing") 
42
185 
src.close() 
186 
return None 
187 

188 
while True: 
189 
block = src.read() 
190 
if block == '': 
191 
break; 
192 
dst.write(block) 
193 

194 
src.close() 
195 
dst.close() 
18
196 

197 
# return the name of the file that we downloaded the data to. 
198 
return filename 
199 

5862
200 
def pypi_url(url, filename): 
201 
"""Given a pypi: URL, return the real URL for that component/version. 
202 

203 
The pypi scheme has a host (with an empty host defaulting to 
204 
pypi.python.org), and a path that should be of the form 
205 
"component==version". Other specs could be supported, but == is the 
206 
only thing that makes sense in this context. 
207 

208 
The filename argument is the name of the expected file to download, so 
209 
that when pypi gives us multiple archives to choose from, we can pick 
210 
the right one. 
211 
""" 
212 

213 
214 

215 
216 
# really that's the only thing that makes sense in this context. 
217 
try: 
218 
name, version = re.match("/(.*)==(.*)$", path).groups() 
219 
except AttributeError: 
220 
print "PyPI URLs must be of the form 'pypi:///component==version'" 
221 
return None 
222 

223 
if not host: 
224 
jsurl = "http://pypi.python.org/pypi/%s/json" % name 
225 
else: 
226 
jsurl = "http://%s/pypi/%s/json" % (host, name) 
227 

228 
try: 
5953
229 
# Don't wait very long for the connection 
230 
f = urlopen(jsurl, None, 2) 
5862
231 
except HTTPError as e: 
232 
if e.getcode() == 404: 
233 
print "Unknown component '%s'" % name 
234 
else: 
235 
printIOError(e, "Can't open PyPI JSON url %s" % url) 
236 
return None 
237 
except IOError as e: 
238 
printIOError(e, "Can't open PyPI JSON url %s" % url) 
239 
return None 
240 

241 
js = json.load(f) 
242 
try: 
243 
verblock = js["releases"][version] 
244 
except KeyError: 
245 
print "Unknown version '%s'" % version 
246 
return None 
247 

248 
urls = [ d["url"] for d in verblock ] 
249 
for archiveurl in urls: 
250 
if archiveurl.endswith("/%s" % os.path.basename(filename)): 
251 
return archiveurl 
252 

253 
if urls: 
254 
print "None of the following URLs delivers '%s':" % filename 
255 
print " " + "\n ".join(urls) 
256 
else: 
257 
print "Couldn't find any suitable URLs" 
258 
return None 
259 

260 
def download_paths(search, filename, url): 
261 
"""Returns a list of URLs where the file 'filename' might be found, 
262 
using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look. 
263 

264 
If 'filename' is None, then the list will simply contain 'url'. 
265 
""" 
266 

267 
urls = list() 
268 

269 
if filename != None: 
270 
tmp = os.getenv('DOWNLOAD_SEARCH_PATH') 
271 
if tmp: 
272 
search += tmp.split(' ') 
273 

274 
file = os.path.basename(filename) 
275 

276 
urls = [ base + '/' + file for base in search ] 
277 

278 
# filename should always be first 
279 
if filename in urls: 
280 
urls.remove(filename) 
281 
urls.insert(0, filename) 
282 

283 
# command line url is a fallback, so it's last 
284 
if url != None and url not in urls: 
285 
urls.append(url) 
286 

287 
return urls 
288 

289 
def download_from_paths(search_list, file_arg, url, timeout_arg, link_arg, quiet=False): 
290 
"""Attempts to download a file from a number of possible locations. 
291 
Generates a list of paths where the file ends up on the local 
292 
filesystem. This is a generator because while a download might be 
293 
successful, the signature or hash may not validate, and the caller may 
294 
want to try again from the next location. The 'link_arg' argument is a 
295 
boolean which, when True, specifies that if the source is not a remote 
296 
URL and not already found where it should be, to make a symlink to the 
297 
source rather than copying it. 
298 
""" 
299 
for url in download_paths(search_list, file_arg, url): 
300 
if not quiet: 
301 
print "Source %s..." % url, 
302 

303 
scheme, path = splittype(url) 
304 
name = file_arg 
305 

306 
if scheme in [ None, 'file' ]: 
307 
if os.path.exists(path) == False: 
308 
if not quiet: 
309 
print "not found, skipping file copy" 
310 
continue 
311 
elif name and name != path: 
312 
if link_arg == False: 
313 
if not quiet: 
314 
print "\n copying..." 
315 
shutil.copy2(path, name) 
316 
else: 
317 
if not quiet: 
318 
print "\n linking..." 
319 
os.symlink(path, name) 
320 
elif scheme in [ 'http', 'https', 'ftp' ]: 
321 
if not quiet: 
322 
print "\n downloading...", 
323 
name = download(url, timeout_arg, file_arg, quiet) 
324 
if name == None: 
325 
if not quiet: 
326 
print "failed" 
327 
continue 
328 
elif scheme == "pypi": 
329 
nurl = pypi_url(url, file_arg) 
330 
if nurl: 
331 
if not quiet: 
332 
print "\n translated %s to %s..." % ( 
333 
url, nurl), 
334 
print "\n downloading...", 
335 
else: 
336 
if not quiet: 
337 
print "\n unable to contact PyPI", 
338 
continue 
339 
name = download(nurl, timeout_arg, file_arg, quiet) 
340 
if name is None: 
341 
if not quiet: 
342 
print "failed" 
343 
continue 
344 
else: 
345 
print " unknown scheme '%s'" % scheme 
346 
return 
347 

348 
yield name 
349 

350 
def usage(): 
351 
print "Usage: %s [ffile (file)] [llink] [hhash (hash)] " \ 
352 
"[ssearch (searchdir)] [Ssigurl (signatureurl)] " \ 
353 
"[ttimeout (timeout)] url (url)" % \ 
354 
(sys.argv[0].split('/')[1]) 
355 
sys.exit(1) 
356 

357 
def main(): 
358 
import getopt 
359 

360 
# FLUSH STDOUT 
361 
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) 
362 

363 
file_arg = None 
364 
link_arg = False 
365 
hash_arg = None 
366 
url_arg = None 
367 
sig_arg = None 
368 
timeout_arg = 300 
369 
search_list = list() 
370 

371 
try: 
372 
opts, args = getopt.getopt(sys.argv[1:], "f:h:ls:S:t:u:", 
373 
["file=", "link", "hash=", "search=", "sigurl=", 
374 
"timeout=", "url="]) 
375 
except getopt.GetoptError, err: 
376 
print str(err) 
377 
usage() 
378 

379 
for opt, arg in opts: 
380 
if opt in [ "f", "file" ]: 
381 
file_arg = arg 
382 
elif opt in [ "l", "link" ]: 
383 
link_arg = True 
384 
elif opt in [ "h", "hash" ]: 
42
385 
hash_arg = arg 
386 
elif opt in [ "s", "search" ]: 
387 
search_list.append(arg) 
388 
elif opt in [ "S", "sigurl" ]: 
389 
sig_arg = arg 
390 
elif opt in [ "t", "timeout" ]: 
391 
try: 
392 
timeout_arg = int(arg) 
393 
except ValueError: 
394 
print "Invalid argument for %s, should be a " \ 
395 
"number, but is %s" % (opt, arg) 
396 
sys.exit(1) 
397 
if timeout_arg < 0: 
398 
print "Invalid argument for %s, should be a " \ 
399 
"positive number, but is %s" % (opt, arg) 
400 
sys.exit(1) 
401 
elif opt in [ "u", "url" ]: 
402 
url_arg = arg 
403 
else: 
404 
assert False, "unknown option" 
405 

406 
for name in download_from_paths(search_list, file_arg, url_arg, 
407 
timeout_arg, link_arg): 
408 
print "\n validating signature...", 
409 

410 
sig_valid = False 
411 
if not sig_arg: 
412 
print "skipping (no signature URL)" 
413 
else: 
414 
# Put the signature file in the same directory as the 
415 
# file we're downloading. 
416 
sig_file = os.path.join( 
417 
os.path.dirname(file_arg), 
418 
os.path.basename(sig_arg)) 
419 
# Validate with the first signature we find. 
420 
for sig_file in download_from_paths(search_list, sig_file, 
421 
sig_arg, timeout_arg, link_arg, True): 
422 
if sig_file: 
423 
if validate_signature(name, sig_file): 
424 
print "ok" 
425 
sig_valid = True 
426 
else: 
427 
print "failed" 
428 
break 
429 
else: 
430 
continue 
431 
else: 
432 
print "failed (couldn't fetch signature)" 
433 

434 
print " validating hash...", 
435 
realhash = validate_container(name, hash_arg) 
436 

437 
if not hash_arg: 
438 
print "skipping (no hash)" 
439 
print "hash is: %s" % realhash 
440 
elif realhash == hash_arg: 
441 
print "ok" 
442 
else: 
443 
payloadhash = validate_payload(name, hash_arg) 
444 
if payloadhash == hash_arg: 
445 
print "ok" 
446 
else: 
447 
# If the signature validated, then we assume 
448 
# that the expected hash is just a typo, but we 
449 
# warn just in case. 
450 
if sig_valid: 
451 
print "invalid hash! Did you forget " \ 
452 
"to update it?" 
453 
else: 
454 
print "corruption detected" 
455 

456 
print " expected: %s" % hash_arg 
457 
print " actual: %s" % realhash 
458 
print " payload: %s" % payloadhash 
459 

460 
# If the hash is invalid, but the signature 
461 
# validation succeeded, rename the archive (so 
462 
# the user doesn't have to redownload it) and 
463 
# fail. Otherwise, try to remove the file and 
464 
# try again. 
465 
if sig_valid: 
466 
newname = name + ".invalidhash" 
467 
try: 
468 
os.rename(name, newname) 
469 
except OSError: 
470 
pass 
471 
else: 
472 
print "archive saved as %s; " \ 
473 
"if it isn't corrupt, " \ 
474 
"rename to %s" % (newname, 
475 
name) 
476 
sys.exit(1) 
477 
else: 
478 
try: 
479 
os.remove(name) 
480 
except OSError: 
481 
pass 
482 

483 
continue 
484 

485 
sys.exit(0) 
486 
sys.exit(1) 
487 

488 
if __name__ == "__main__": 
489 
main() 