17 # fields enclosed by brackets "[]" replaced with your own identifying |
17 # fields enclosed by brackets "[]" replaced with your own identifying |
18 # information: Portions Copyright [yyyy] [name of copyright owner] |
18 # information: Portions Copyright [yyyy] [name of copyright owner] |
19 # |
19 # |
20 # CDDL HEADER END |
20 # CDDL HEADER END |
21 # |
21 # |
22 # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. |
22 # Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved. |
23 # |
23 # |
24 # |
24 # |
25 # fetch.py - a file download utility |
25 # userland-fetch - a file download utility |
26 # |
26 # |
27 # A simple program similiar to wget(1), but handles local file copy, ignores |
27 # A simple program similiar to wget(1), but handles local file copy, ignores |
28 # directories, and verifies file hashes. |
28 # directories, and verifies file hashes. |
29 # |
29 # |
30 |
30 |
|
31 import errno |
31 import os |
32 import os |
32 import sys |
33 import sys |
33 import shutil |
34 import shutil |
|
35 import subprocess |
34 from urllib import splittype |
36 from urllib import splittype |
35 from urllib2 import urlopen |
37 from urllib2 import urlopen |
36 import hashlib |
38 import hashlib |
37 |
39 |
38 def printIOError(e, txt): |
40 def printIOError(e, txt): |
41 try: |
43 try: |
42 (code, message) = e |
44 (code, message) = e |
43 print str(message) + " (" + str(code) + ")" |
45 print str(message) + " (" + str(code) + ")" |
44 except: |
46 except: |
45 print str(e) |
47 print str(e) |
|
48 |
|
49 def validate_signature(path, signature): |
|
50 """Given paths to a file and a detached PGP signature, verify that |
|
51 the signature is valid for the file. Current configuration allows for |
|
52 unrecognized keys to be downloaded as necessary.""" |
|
53 |
|
54 # Find the root of the repo so that we can point GnuPG at the right |
|
55 # configuration and keyring. |
|
56 proc = subprocess.Popen(["hg", "root"], stdout=subprocess.PIPE) |
|
57 proc.wait() |
|
58 if proc.returncode != 0: |
|
59 return False |
|
60 out, err = proc.communicate() |
|
61 gpgdir = os.path.join(out.strip(), "tools", ".gnupg") |
|
62 |
|
63 # Skip the permissions warning: none of the information here is private, |
|
64 # so not having to worry about getting mercurial keeping the directory |
|
65 # unreadable is just simplest. |
|
66 try: |
|
67 proc = subprocess.Popen(["gpg2", "--verify", |
|
68 "--no-permission-warning", "--homedir", gpgdir, signature, |
|
69 path], stdin=open("/dev/null"), |
|
70 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
|
71 except OSError as e: |
|
72 # If the executable simply couldn't be found, just skip the |
|
73 # validation. |
|
74 if e.errno == errno.ENOENT: |
|
75 return False |
|
76 raise |
|
77 |
|
78 proc.wait() |
|
79 if proc.returncode != 0: |
|
80 # Only print GnuPG's output when there was a problem. |
|
81 print proc.stdout.read() |
|
82 return False |
|
83 return True |
46 |
84 |
47 def validate(file, hash): |
85 def validate(file, hash): |
48 algorithm, hashvalue = hash.split(':') |
86 """Given a file-like object and a hash string, verify that the hash |
|
87 matches the file contents.""" |
|
88 |
|
89 try: |
|
90 algorithm, hashvalue = hash.split(':') |
|
91 except: |
|
92 algorithm = "sha256" |
49 |
93 |
50 # force migration away from sha1 |
94 # force migration away from sha1 |
51 if algorithm == "sha1": |
95 if algorithm == "sha1": |
52 algorithm = "sha256" |
96 algorithm = "sha256" |
53 try: |
97 try: |
67 break |
111 break |
68 |
112 |
69 return "%s:%s" % (algorithm, m.hexdigest()) |
113 return "%s:%s" % (algorithm, m.hexdigest()) |
70 |
114 |
71 def validate_container(filename, hash): |
115 def validate_container(filename, hash): |
|
116 """Given a file path and a hash string, verify that the hash matches the |
|
117 file contents.""" |
|
118 |
72 try: |
119 try: |
73 file = open(filename, 'r') |
120 file = open(filename, 'r') |
74 except IOError as e: |
121 except IOError as e: |
75 printIOError(e, "Can't open file " + filename) |
122 printIOError(e, "Can't open file " + filename) |
76 return False |
123 return False |
77 return validate(file, hash) |
124 return validate(file, hash) |
78 |
125 |
79 |
126 |
80 def validate_payload(filename, hash): |
127 def validate_payload(filename, hash): |
|
128 """Given a file path and a hash string, verify that the hash matches the |
|
129 payload (uncompressed content) of the file.""" |
|
130 |
81 import re |
131 import re |
82 import gzip |
132 import gzip |
83 import bz2 |
133 import bz2 |
84 |
134 |
85 expr_bz = re.compile('.+\.bz2$', re.IGNORECASE) |
135 expr_bz = re.compile('.+\.bz2$', re.IGNORECASE) |
99 printIOError(e, "Can't open archive " + filename) |
149 printIOError(e, "Can't open archive " + filename) |
100 return False |
150 return False |
101 return validate(file, hash) |
151 return validate(file, hash) |
102 |
152 |
103 |
153 |
104 def download(url, filename = None): |
154 def download(url, filename=None, quiet=False): |
|
155 """Download the content at the given URL to the given filename |
|
156 (defaulting to the basename of the URL if not given. If 'quiet' is |
|
157 True, throw away any error messages. Returns the name of the file to |
|
158 which the content was donloaded.""" |
|
159 |
105 src = None |
160 src = None |
106 |
161 |
107 try: |
162 try: |
108 src = urlopen(url) |
163 src = urlopen(url) |
109 except IOError as e: |
164 except IOError as e: |
110 printIOError(e, "Can't open url " + url) |
165 if not quiet: |
|
166 printIOError(e, "Can't open url " + url) |
111 return None |
167 return None |
112 |
168 |
113 # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action |
169 # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action |
114 if 3 <= int(src.getcode()/100) <= 5: |
170 if 3 <= int(src.getcode()/100) <= 5: |
115 print "Error code: " + str(src.getcode()) |
171 if not quiet: |
|
172 print "Error code: " + str(src.getcode()) |
116 return None |
173 return None |
117 |
174 |
118 if filename == None: |
175 if filename == None: |
119 filename = src.geturl().split('/')[-1] |
176 filename = src.geturl().split('/')[-1] |
120 |
177 |
121 try: |
178 try: |
122 dst = open(filename, 'wb'); |
179 dst = open(filename, 'wb'); |
123 except IOError as e: |
180 except IOError as e: |
124 printIOError(e, "Can't open file " + filename + " for writing") |
181 if not quiet: |
|
182 printIOError(e, "Can't open file " + filename + " for writing") |
125 src.close() |
183 src.close() |
126 return None |
184 return None |
127 |
185 |
128 while True: |
186 while True: |
129 block = src.read() |
187 block = src.read() |
158 if url != None and url not in urls: |
222 if url != None and url not in urls: |
159 urls.append(url) |
223 urls.append(url) |
160 |
224 |
161 return urls |
225 return urls |
162 |
226 |
|
227 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False): |
|
228 """Attempts to download a file from a number of possible locations. |
|
229 Generates a list of paths where the file ends up on the local |
|
230 filesystem. This is a generator because while a download might be |
|
231 successful, the signature or hash may not validate, and the caller may |
|
232 want to try again from the next location. The 'link_arg' argument is a |
|
233 boolean which, when True, specifies that if the source is not a remote |
|
234 URL and not already found where it should be, to make a symlink to the |
|
235 source rather than copying it. |
|
236 """ |
|
237 for url in download_paths(search_list, file_arg, url): |
|
238 if not quiet: |
|
239 print "Source %s..." % url, |
|
240 |
|
241 scheme, path = splittype(url) |
|
242 name = file_arg |
|
243 |
|
244 if scheme in [ None, 'file' ]: |
|
245 if os.path.exists(path) == False: |
|
246 if not quiet: |
|
247 print "not found, skipping file copy" |
|
248 continue |
|
249 elif name and name != path: |
|
250 if link_arg == False: |
|
251 if not quiet: |
|
252 print "\n copying..." |
|
253 shutil.copy2(path, name) |
|
254 else: |
|
255 if not quiet: |
|
256 print "\n linking..." |
|
257 os.symlink(path, name) |
|
258 elif scheme in [ 'http', 'https', 'ftp' ]: |
|
259 if not quiet: |
|
260 print "\n downloading...", |
|
261 name = download(url, file_arg, quiet) |
|
262 if name == None: |
|
263 if not quiet: |
|
264 print "failed" |
|
265 continue |
|
266 |
|
267 yield name |
|
268 |
163 def usage(): |
269 def usage(): |
164 print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] [-s|--search (search-dir)] --url (url)" % (sys.argv[0].split('/')[-1]) |
270 print "Usage: %s [-f|--file (file)] [-l|--link] [-h|--hash (hash)] " \ |
|
271 "[-s|--search (search-dir)] [-S|--sigurl (signature-url)] --url (url)" % \ |
|
272 (sys.argv[0].split('/')[-1]) |
165 sys.exit(1) |
273 sys.exit(1) |
166 |
274 |
167 def main(): |
275 def main(): |
168 import getopt |
276 import getopt |
169 |
277 |
190 link_arg = True |
299 link_arg = True |
191 elif opt in [ "-h", "--hash" ]: |
300 elif opt in [ "-h", "--hash" ]: |
192 hash_arg = arg |
301 hash_arg = arg |
193 elif opt in [ "-s", "--search" ]: |
302 elif opt in [ "-s", "--search" ]: |
194 search_list.append(arg) |
303 search_list.append(arg) |
|
304 elif opt in [ "-S", "--sigurl" ]: |
|
305 sig_arg = arg |
195 elif opt in [ "-u", "--url" ]: |
306 elif opt in [ "-u", "--url" ]: |
196 url_arg = arg |
307 url_arg = arg |
197 else: |
308 else: |
198 assert False, "unknown option" |
309 assert False, "unknown option" |
199 |
310 |
200 if url_arg == None: |
311 if url_arg == None: |
201 usage() |
312 usage() |
202 |
313 |
203 for url in download_paths(search_list, file_arg, url_arg): |
314 for name in download_from_paths(search_list, file_arg, url_arg, link_arg): |
204 print "Source %s..." % url, |
315 print "\n validating signature...", |
205 |
316 |
206 scheme, path = splittype(url) |
317 sig_valid = False |
207 name = file_arg |
318 if not sig_arg: |
208 |
319 print "skipping (no signature URL)" |
209 if scheme in [ None, 'file' ]: |
320 else: |
210 if os.path.exists(path) == False: |
321 # Put the signature file in the same directory as the |
211 print "not found, skipping file copy" |
322 # file we're downloading. |
212 continue |
323 sig_file = os.path.join( |
213 elif name != path: |
324 os.path.dirname(file_arg), |
214 if link_arg == False: |
325 os.path.basename(sig_arg)) |
215 print "\n copying..." |
326 # Validate with the first signature we find. |
216 shutil.copy2(path, name) |
327 for sig_file in download_from_paths(search_list, sig_file, |
|
328 sig_arg, link_arg, True): |
|
329 if sig_file: |
|
330 if validate_signature(name, sig_file): |
|
331 print "ok" |
|
332 sig_valid = True |
|
333 else: |
|
334 print "failed" |
|
335 break |
217 else: |
336 else: |
218 print "\n linking..." |
337 continue |
219 os.symlink(path, name) |
|
220 else: |
338 else: |
221 pass |
339 print "failed (couldn't fetch signature)" |
222 elif scheme in [ 'http', 'https', 'ftp' ]: |
340 |
223 print "\n downloading...", |
341 print " validating hash...", |
224 name = download(url, file_arg) |
342 realhash = validate_container(name, hash_arg) |
225 if name == None: |
343 |
226 print "failed" |
344 if not hash_arg: |
227 continue |
|
228 |
|
229 print "\n validating...", |
|
230 if hash_arg == None: |
|
231 print "skipping (no hash)" |
345 print "skipping (no hash)" |
232 sys.exit(0) |
346 print "hash is: %s" % realhash |
233 |
347 elif realhash == hash_arg: |
234 realhash = validate_container(name, hash_arg) |
|
235 if realhash == hash_arg: |
|
236 print "ok" |
348 print "ok" |
237 sys.exit(0) |
|
238 else: |
349 else: |
239 payloadhash = validate_payload(name, hash_arg) |
350 payloadhash = validate_payload(name, hash_arg) |
240 if payloadhash == hash_arg: |
351 if payloadhash == hash_arg: |
241 print "ok" |
352 print "ok" |
242 sys.exit(0) |
353 else: |
243 print "corruption detected" |
354 # If the signature validated, then we assume |
244 print " expected: %s" % hash_arg |
355 # that the expected hash is just a typo, but we |
245 print " actual: %s" % realhash |
356 # warn just in case. |
246 print " payload: %s" % payloadhash |
357 if sig_valid: |
247 |
358 print "invalid hash!" |
248 try: |
359 else: |
249 os.remove(name) |
360 print "corruption detected" |
250 except OSError: |
361 |
251 pass |
362 print " expected: %s" % hash_arg |
252 |
363 print " actual: %s" % realhash |
|
364 print " payload: %s" % payloadhash |
|
365 |
|
366 # An invalid hash shouldn't cause us to remove |
|
367 # the target file if the signature was valid. |
|
368 if not sig_valid: |
|
369 try: |
|
370 os.remove(name) |
|
371 except OSError: |
|
372 pass |
|
373 |
|
374 continue |
|
375 |
|
376 sys.exit(0) |
253 sys.exit(1) |
377 sys.exit(1) |
254 |
378 |
255 if __name__ == "__main__": |
379 if __name__ == "__main__": |
256 main() |
380 main() |