1018 actions with payloads should include all sizes and hashes
authorjohansen <johansen@sun.com>
Fri, 15 Aug 2008 14:03:49 -0700
changeset 461 37cf3ac75e37
parent 460 784509473aca
child 462 910600c14093
1018 actions with payloads should include all sizes and hashes 1461 depot should ignore requests for absent files 2227 File action should verify its checksums 2261 Support mirroring of package content 2285 Versions lists unavailable operations 2330 Imageconfig incorrectly parses boolean config options
src/client.py
src/depot.py
src/modules/client/filelist.py
src/modules/client/image.py
src/modules/client/imageconfig.py
src/modules/client/retrieve.py
src/modules/depotcontroller.py
src/modules/misc.py
src/modules/pkggzip.py
src/modules/server/config.py
src/modules/server/repository.py
src/modules/server/transaction.py
src/tests/baseline.txt
src/tests/cli/t_commandline.py
src/tests/cli/t_depotcontroller.py
--- a/src/client.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/client.py	Fri Aug 15 14:03:49 2008 -0700
@@ -102,7 +102,8 @@
             [-k ssl_key] [-c ssl_cert] -a <prefix>=<url> dir
 
         pkg set-authority [-P] [-k ssl_key] [-c ssl_cert]
-            [-O origin_url] authority
+            [-O origin_url] [-m mirror to add | --add-mirror=mirror to add]
+            [-M mirror to remove | --remove-mirror=mirror to remove] authority
         pkg unset-authority authority ...
         pkg authority [-HP] [authname]
         pkg rebuild-index
@@ -1359,14 +1360,19 @@
 
 def authority_set(img, args):
         """pkg set-authority [-P] [-k ssl_key] [-c ssl_cert]
-            [-O origin_url] authority"""
+            [-O origin_url] [-m mirror to add] [-M mirror to remove] 
+            authority"""
 
         preferred = False
         ssl_key = None
         ssl_cert = None
         origin_url = None
+        add_mirror = None
+        remove_mirror = None
 
-        opts, pargs = getopt.getopt(args, "Pk:c:O:")
+        opts, pargs = getopt.getopt(args, "Pk:c:O:M:m:",
+            ["add-mirror=", "remove-mirror="])
+
         for opt, arg in opts:
                 if opt == "-P":
                         preferred = True
@@ -1376,6 +1382,10 @@
                         ssl_cert = arg
                 if opt == "-O":
                         origin_url = arg
+                if opt == "-m" or opt == "--add-mirror":
+                        add_mirror = arg
+                if opt == "-M" or opt == "--remove-mirror":
+                        remove_mirror = arg
 
         if len(pargs) != 1:
                 usage(
@@ -1400,8 +1410,8 @@
 
 
         if not img.has_authority(auth) and origin_url == None:
-                error(_("set-authority: must define origin URL for new " \
-                    "authority"))
+                error(_("set-authority: authority does not exist. Use " \
+                    "-O to define origin URL for new authority"))
                 return 1
 
         elif not img.has_authority(auth) and not misc.valid_auth_prefix(auth):
@@ -1422,6 +1432,33 @@
         if preferred:
                 img.set_preferred_authority(auth)
 
+
+        if add_mirror:
+
+                if not misc.valid_auth_url(add_mirror):
+                        error(_("set-authority: added mirror's URL is invalid"))
+                        return 1
+
+                if img.has_mirror(auth, add_mirror):
+                        error(_("set-authority: mirror already exists"))
+                        return 1
+
+                img.add_mirror(auth, add_mirror)
+
+        if remove_mirror:
+
+                if not misc.valid_auth_url(remove_mirror):
+                        error(_("set-authority: removed mirror has bad URL"))
+                        return 1
+
+                if not img.has_mirror(auth, remove_mirror):
+                        error(_("set-authority: mirror does not exist"))
+                        return 1
+
+
+                img.del_mirror(auth, remove_mirror)
+
+
         return 0
 
 def authority_unset(img, args):
@@ -1474,7 +1511,8 @@
 
                 for a in auths:
                         # summary list
-                        pfx, url, ssl_key, ssl_cert, dt = img.split_authority(a)
+                        pfx, url, ssl_key, ssl_cert, dt, mir = \
+                            img.split_authority(a)
 
                         if not preferred_only and pfx == preferred_authority:
                                 pfx += " (preferred)"
@@ -1490,7 +1528,7 @@
 
                         # detailed print
                         auth = img.get_authority(a)
-                        pfx, url, ssl_key, ssl_cert, dt = \
+                        pfx, url, ssl_key, ssl_cert, dt, mir = \
                             img.split_authority(auth)
 
                         if dt:
@@ -1502,6 +1540,7 @@
                         msg("        SSL Key:", ssl_key)
                         msg("       SSL Cert:", ssl_cert)
                         msg("Catalog Updated:", dt)
+                        msg("        Mirrors:", mir)
 
         return 0
 
@@ -1739,7 +1778,12 @@
                 # further broken pipe (EPIPE) errors.
                 sys.exit(1)
         except misc.TransferTimedOutException:
-                msg(_("Maximum number of timeouts exceeded during download."))
+                error(_("Maximum number of timeouts exceeded during download."))
+                sys.exit(1)
+        except misc.InvalidContentException, e:
+                error(_("One or more hosts providing content for this install" +
+                    " has provided a file with invalid content."))
+                error(_(str(e)))
                 sys.exit(1)
         except:
                 traceback.print_exc()
--- a/src/depot.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/depot.py	Fri Aug 15 14:03:49 2008 -0700
@@ -63,6 +63,8 @@
 REBUILD_DEFAULT = False
 # Whether the indexes should be rebuilt
 REINDEX_DEFAULT = False
+# Not in mirror mode by default
+MIRROR_DEFAULT = False
 
 import getopt
 import os
@@ -90,12 +92,14 @@
 
 def usage():
         print """\
-Usage: /usr/lib/pkg.depotd [--readonly] [--rebuild] [--proxy-base url]
+Usage: /usr/lib/pkg.depotd [--readonly] [--rebuild] [--mirror] [--proxy-base url]
            [-d repo_dir] [-p port] [-s threads] [-t socket_timeout] 
 
         --readonly      Read-only operation; modifying operations disallowed
         --rebuild       Re-build the catalog from pkgs in depot
-                        Cannot be used with --readonly
+                        Cannot be used with --readonly or --mirror
+        --mirror        Content mirror mode. Publishing and metadata operations
+                        disabled
         --proxy-base    The url to use as the base for generating internal
                         redirects and content.
 """
@@ -116,6 +120,7 @@
         rebuild = REBUILD_DEFAULT
         reindex = REINDEX_DEFAULT
         proxy_base = None
+        mirror = MIRROR_DEFAULT
 
         if "PKG_REPO" in os.environ:
                 repo_path = os.environ["PKG_REPO"]
@@ -125,7 +130,7 @@
         try:
                 parsed = set()
                 opts, pargs = getopt.getopt(sys.argv[1:], "d:np:s:t:",
-                    ["readonly", "rebuild", "proxy-base=", "refresh-index"])
+                    ["readonly", "rebuild", "mirror", "proxy-base=", "refresh-index"])
                 opt = None
                 for opt, arg in opts:
                         if opt in parsed:
@@ -180,6 +185,8 @@
                                     path, params, query, fragment)
                                     ).lstrip("//")
 
+                        elif opt == "--mirror":
+                                mirror = True
         except getopt.GetoptError, e:
                 print "pkg.depotd: %s" % e.msg
                 usage()
@@ -194,11 +201,11 @@
         if rebuild and reindex:
                 print "--refresh-index cannot be used with --rebuild"
                 usage()
-        if rebuild and readonly:
-                print "--readonly cannot be used with --rebuild"
+        if rebuild and (readonly or mirror):
+                print "--readonly and --mirror cannot be used with --rebuild"
                 usage()
-        if reindex and readonly:
-                print "--readonly cannot be used with --refresh-index"
+        if reindex and (readonly or mirror):
+                print "--readonly and --mirror cannot be used with --refresh-index"
                 usage()
 
         # If the program is going to reindex, the port is irrelevant since
@@ -223,6 +230,9 @@
         if readonly:
                 scfg.set_read_only()
 
+        if mirror:
+                scfg.set_mirror()
+
         try:
                 scfg.init_dirs()
         except EnvironmentError, e:
--- a/src/modules/client/filelist.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/client/filelist.py	Fri Aug 15 14:03:49 2008 -0700
@@ -28,7 +28,11 @@
 import os
 import urllib
 import urllib2
+import httplib
 import socket
+import time
+import sha
+from tarfile import ReadError
 
 import pkg.pkgtarfile as ptf
 import pkg.portable as portable
@@ -36,6 +40,8 @@
 from pkg.misc import versioned_urlopen
 from pkg.misc import hash_file_name
 from pkg.misc import TransferTimedOutException
+from pkg.misc import TransferContentException
+from pkg.misc import InvalidContentException
 from pkg.misc import MAX_TIMEOUT_COUNT
 
 class FileList(object):
@@ -84,6 +90,18 @@
                 self.effective_bytes = 0
                 self.effective_nfiles = 0
 
+                if fmri:
+                        auth, pkg_name, version = self.fmri.tuple()
+
+                        self.authority = pkg.fmri.strip_auth_pfx(auth)
+                        self.ssl_tuple = self.image.get_ssl_credentials(auth)
+                else:
+                        self.authority = None
+                        self.ssl_tuple = None
+
+                self.ds = None
+                self.url = None
+
         def add_action(self, action):
                 """Add the specified action to the filelist.  The action
                 must name a file that can be retrieved from the repository.
@@ -104,6 +122,8 @@
                         action.data = self._make_opener(cache_path)
                         bytes = int(action.attrs.get("pkg.size", "0"))
 
+                        self._verify_content(action, cache_path)
+
                         self.progtrack.download_adjust_goal(0, -1, -bytes)
 
                         return
@@ -149,6 +169,13 @@
                 self.effective_nfiles += 1
                 self.effective_bytes += int(action.attrs.get("pkg.size", "0"))
 
+        def _clear_mirror(self):
+                """Clear any selected DepotStatus and URL assocated with
+                a mirror selection."""
+
+                self.ds = None
+                self.url = None
+
         def _del_hash(self, hash):
                 """Given the supplied content hash, remove the entry
                 from the flist's dictionary and adjust the counters
@@ -186,16 +213,29 @@
 
                 nfiles = self._get_nfiles()
                 nbytes = self._get_nbytes()
+                chosen_mirrors = set()
+                ts = 0
 
                 while files_extracted == 0:
                         try:
+                                self._pick_mirror(chosen_mirrors)
+                                ts = time.time()
+
                                 fe = self._get_files()
                                 files_extracted += fe
-                        except TransferTimedOutException:
+
+                        except (TransferTimedOutException,
+                            TransferContentException, InvalidContentException):
+
                                 retry_count -= 1
+                                self.ds.record_error()
+                                self._clear_mirror()
 
                                 if retry_count <= 0:
-                                        raise
+                                        raise TransferTimedOutException
+                        else:
+                                ts = time.time() - ts
+                                self.ds.record_success(ts)
 
                 nfiles -= self._get_nfiles()
                 nbytes -= self._get_nbytes()
@@ -232,12 +272,15 @@
                 try:
                         l = self.fhash[hashval]
                 except KeyError:
-                        # If the key isn't in the dictionary, the server
-                        # sent us a file we didn't ask for.  In this
-                        # case, we can't create an opener for it, so just
-                        # leave it in the cache.
+                        # If the key isn't in the dictionary, the server sent us
+                        # a file we didn't ask for.  In this case, we can't
+                        # create an opener for it, nor should we leave it in the
+                        # cache.
+                        os.remove(final_path)
                         return
 
+                self._verify_content(l[0], final_path)
+
                 for action in l:
                         action.data = self._make_opener(final_path)
 
@@ -264,10 +307,7 @@
                 tar_stream = None
                 files_extracted = 0
 
-                authority, pkg_name, version = self.fmri.tuple()
-                authority = pkg.fmri.strip_auth_pfx(authority)
-                url_prefix = self.image.get_url_by_authority(authority)
-                ssl_tuple = self.image.get_ssl_credentials(authority)
+                url_prefix = self.url
 
                 download_dir = self.image.incoming_download_dir()
                 # Make sure the download directory is there before we start
@@ -288,20 +328,21 @@
 
                 try:
                         f, v = versioned_urlopen(url_prefix, "filelist", [0],
-                            data = req_str, ssl_creds = ssl_tuple,
+                            data = req_str, ssl_creds = self.ssl_tuple,
                             imgtype = self.image.type)
                 except RuntimeError:
                         raise FileListException, "No server-side support" 
-                except urllib2.HTTPError:
-                        # This is stupidest class hierarchy ever.
+                except urllib2.HTTPError, e:
+                        # Must check for HTTPError before URLError
+                        if e.code == httplib.REQUEST_TIMEOUT:
+                                raise TransferTimedOutException
                         raise
                 except urllib2.URLError, e:
                         if len(e.args) == 1 and \
                             isinstance(e.args[0], socket.timeout):
                                 self.image.cleanup_downloads()
                                 raise TransferTimedOutException
-                        else:
-                                raise
+                        raise
 
                 # Exception handling here is a bit complicated.  The finally
                 # block makes sure we always close our file objects.  If we get
@@ -319,6 +360,9 @@
                         except socket.timeout:
                                 self.image.cleanup_downloads()
                                 raise TransferTimedOutException
+                        except ReadError:
+                                raise TransferContentException
+
                 finally:
                         if tar_stream:
                                 tar_stream.close()
@@ -353,6 +397,43 @@
                         return f
                 return opener                                
 
+        def _pick_mirror(self, chosen_set=None):
+                """If we don't already have a DepotStatus or a URL,
+                select a mirror, populate the DepotStatus, and choose a URL."""
+
+                if self.ds and self.url:
+                        return
+                elif self.ds:
+                        self.url = self.ds.url
+                else:
+                        self.ds = self.image.select_mirror(self.authority,
+                            chosen_set)
+                        self.url = self.ds.url
+                        chosen_set.add(self.ds)
+
+        @staticmethod
+        def _verify_content(action, filepath):
+                """If action contains an attribute that has the compressed
+                hash, read the file specified in filepath and verify
+                that the hash values match.  If the values do not match,
+                remove the file and raise an InvalidContentException."""
+
+                chash = action.attrs.get("chash", None)
+                if not chash:
+                        return
+
+                cfile = open(filepath, "rb")
+                cdata = cfile.read()
+                cfile.close()
+                hashobj = sha.new(cdata)
+                newhash = hashobj.hexdigest()
+                cdata = None
+
+                if chash != newhash:
+                       os.remove(filepath)
+                       raise InvalidContentException(action, newhash)
+
+
 class FileListException(Exception):
         def __init__(self, args=None):
                 self.args = args
--- a/src/modules/client/image.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/client/image.py	Fri Aug 15 14:03:49 2008 -0700
@@ -32,6 +32,7 @@
 import httplib
 import shutil
 import time
+import operator
 
 from pkg.misc import msg, emsg
 
@@ -306,6 +307,56 @@
 
                 return o.rstrip("/")
 
+        def select_mirror(self, auth = None, chosen_set = None):
+                """For the given authority, look through the status of
+                the mirrors.  Pick the best one.  This method returns
+                a DepotStatus object or None.  The chosen_set argument
+                contains a set object that lists the mirrors that were
+                previously chosen.  This allows us to choose both
+                by depot status statistics and ensures we don't
+                always pick the same depot."""
+
+                if auth == None:
+                        auth = self.cfg_cache.preferred_authority
+                try:
+                        slst = self.cfg_cache.mirror_status[auth]
+                except KeyError:
+                        # If the authority that we're trying to get no longer
+                        # exists, fall back to preferred authority.
+                        auth = self.cfg_cache.preferred_authority
+                        slst = self.cfg_cache.mirror_status[auth]
+
+                if len(slst) == 0:
+                        if auth in self.cfg_cache.authority_status:
+                                return self.cfg_cache.authority_status[auth]
+                        else:
+                                return None
+
+                # Choose mirror with fewest errors.
+                # If mirrors have same number of errors, choose mirror
+                # with smaller number of good transactions.  Assume it's
+                # being underused, not high-latency.
+                #
+                # XXX Will need to revisit the above assumption.
+                def cmp_depotstatus(a, b):
+                        res = cmp(a.errors, b.errors)
+                        if res == 0:
+                                return cmp(a.good_tx, b.good_tx)
+                        return res                        
+
+                slst.sort(cmp = cmp_depotstatus)
+
+                if chosen_set and len(chosen_set) == len(slst):
+                        chosen_set.clear()
+                        chosen_set = None
+
+                if chosen_set and slst[0] in chosen_set:
+                        for ds in slst:
+                                if ds not in chosen_set:
+                                        return ds
+
+                return slst[0]
+
         def get_ssl_credentials(self, authority = None, origin = None):
                 """Return a tuple containing (ssl_key, ssl_cert) for the
                 specified authority prefix.  If the authority isn't specified,
@@ -367,7 +418,7 @@
                                 update_dt = catalog.ts_to_datetime(update_dt)
 
                 return (prefix, auth["origin"], auth["ssl_key"],
-                    auth["ssl_cert"], update_dt)
+                    auth["ssl_cert"], update_dt, auth["mirrors"])
 
         def set_preferred_authority(self, auth_name):
                 if not self.has_authority(auth_name):
@@ -376,7 +427,7 @@
                 self.cfg_cache.write("%s/cfg_cache" % self.imgdir)
 
         def set_authority(self, auth_name, origin_url = None, ssl_key = None,
-            ssl_cert = None, mirrors = []):
+            ssl_cert = None):
 
                 auths = self.cfg_cache.authorities
 
@@ -390,19 +441,40 @@
                                 auths[auth_name]["ssl_key"] = ssl_key
                         if ssl_cert:
                                 auths[auth_name]["ssl_cert"] = ssl_cert
-                        if mirrors:
-                                auths[auth_name]["mirrors"] = mirrors
 
                 else:
                         auths[auth_name] = {}
                         auths[auth_name]["prefix"] = auth_name
                         auths[auth_name]["origin"] = \
                             misc.url_affix_trailing_slash(origin_url)
-                        auths[auth_name]["mirrors"] = mirrors
+                        auths[auth_name]["mirrors"] = []
                         auths[auth_name]["ssl_key"] = ssl_key
                         auths[auth_name]["ssl_cert"] = ssl_cert
 
                 self.cfg_cache.write("%s/cfg_cache" % self.imgdir)
+                        
+        def add_mirror(self, auth_name, mirror):
+                """Add the mirror URL contained in mirror to
+                auth_name's list of mirrors."""
+
+                auths = self.cfg_cache.authorities
+                auths[auth_name]["mirrors"].append(mirror)
+                self.cfg_cache.write("%s/cfg_cache" % self.imgdir)
+
+        def has_mirror(self, auth_name, url):
+                """Returns true if url is in auth_name's list of mirrors."""
+
+                return url in self.cfg_cache.authorities[auth_name]["mirrors"]
+
+        def del_mirror(self, auth_name, mirror):
+                """Remove the mirror URL contained in mirror from
+                auth_name's list of mirrors."""
+
+                auths = self.cfg_cache.authorities
+
+                if mirror in self.cfg_cache.authorities[auth_name]["mirrors"]:
+                        auths[auth_name]["mirrors"].remove(mirror)
+                        self.cfg_cache.write("%s/cfg_cache" % self.imgdir)
 
         def verify(self, fmri, progresstracker, **args):
                 """generator that returns any errors in installed pkgs
--- a/src/modules/client/imageconfig.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/client/imageconfig.py	Fri Aug 15 14:03:49 2008 -0700
@@ -29,6 +29,41 @@
 import pkg.misc as misc
 from pkg.misc import msg
 
+class DepotStatus(object):
+        """An object that encapsulates status about a depot server.
+        This includes things like observed performance, availability,
+        successful and unsuccessful transaction rates, etc."""
+
+        def __init__(self, authority, url):
+                """Authority is the authority prefix for this depot.
+                Url is the URL that names the server or mirror itself."""
+
+                self.auth = authority
+                self.url = url.rstrip("/")
+                self.available = True
+
+                self.errors = 0
+                self.good_tx = 0
+
+                self.last_tx_time = None
+
+        def record_error(self):
+
+                self.errors += 1
+
+        def record_success(self, tx_time):
+
+                self.good_tx += 1
+                self.last_tx_time = tx_time
+
+        def set_available(self, avail):
+
+                if avail:
+                        self.available = True
+                else:
+                        self.available = False
+
+
 class ImageConfig(object):
         """An ImageConfig object is a collection of configuration information:
         URLs, authorities, policies, etc. that allow an Image to operate."""
@@ -40,8 +75,16 @@
         # XXX Use of ConfigParser is convenient and at most speculative--and
         # definitely not interface.
 
+        BOOLEAN_POLICIES = [
+                "require-optional",
+                "pursue-latest",
+                "display-copyrights",
+                "flush-content-cache-on-success" ]       
+
         def __init__(self):
                 self.authorities = {}
+                self.authority_status = {}
+                self.mirror_status = {}
                 self.preferred_authority = None
                 self.flush_content_cache = False
                 self.filters = {}
@@ -74,6 +117,7 @@
                                 # authority block has prefix, origin, and
                                 # mirrors
                                 a = {}
+                                ms = []
 
                                 k = cp.get(s, "prefix")
 
@@ -83,7 +127,9 @@
 
                                 a["prefix"] = k
                                 a["origin"] = cp.get(s, "origin")
-                                a["mirrors"] = cp.get(s, "mirrors")
+                                a["mirrors"] = self.read_list(
+                                    cp.get(s, "mirrors"))
+
                                 try:
                                         a["ssl_key"] = cp.get(s, "ssl_key")
                                 except ConfigParser.NoOptionError:
@@ -98,31 +144,37 @@
                                     misc.url_affix_trailing_slash(a["origin"])
 
                                 self.authorities[k] = a
+                                self.authority_status[k] = DepotStatus(k,
+                                    a["origin"])
 
+                                for mirror in a["mirrors"]:
+                                        ms.append(DepotStatus(k, mirror))
+                                ms.append(self.authority_status[k])
+
+                                self.mirror_status[k] = ms
+                                
                                 if self.preferred_authority == None:
                                         self.preferred_authority = k
 
-                        if re.match("policy", s):
-                                for o in cp.options("policy"):
+                if cp.has_section("policy"):
+                        for o in cp.options("policy"):
+                                if o in self.BOOLEAN_POLICIES:
+                                        self.policies[o] = \
+                                            cp.getboolean("policy", o)
+                                else:
                                         self.policies[o] = cp.get("policy", o)
 
-                        if re.match("filter", s):
-                                for o in cp.options("filter"):
-                                        self.filters[o] = cp.get("filter", o)
-
-                        # XXX Child images
+                if cp.has_section("filter"):
+                        for o in cp.options("filter"):
+                                self.filters[o] = cp.get("filter", o)
 
                 if "preferred-authority" in self.policies:
-                        self.preferred_authority = self.policies["preferred-authority"]
+                        self.preferred_authority = \
+                            self.policies["preferred-authority"]
 
                 if "flush-content-cache-on-success" in self.policies:
-                        policystr = self.policies["flush-content-cache-on-success"]
-
-                        if policystr == "True" or policystr == "true":
-                                self.flush_content_cache = True
-                        else:
-                                self.flush_content_cache = False
-
+                        self.flush_content_cache = \
+                            self.policies["flush-content-cache-on-success"]
 
         def write(self, path):
                 cp = ConfigParser.SafeConfigParser()
@@ -165,6 +217,23 @@
         def delete_authority(self, auth):
                 del self.authorities[auth]
 
+        @staticmethod
+        def read_list(str):
+                """Take a list in string representation and convert it back
+                to a Python list."""
+                
+                # Strip brackets and any whitespace
+                str = str.strip("][ ")
+                # Strip comma and any whitespeace
+                lst = str.split(", ")
+                # Strip empty whitespace, single, and double quotation marks
+                lst = [ s.strip("' \"") for s in lst ]
+                # Eliminate any empty strings
+                lst = [ s for s in lst if s != '' ]
+
+                return lst
+ 
+
 if __name__ == "__main__":
         # XXX Need to construct a trivial configuration, load it, and verify
         # correctness.
--- a/src/modules/client/retrieve.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/client/retrieve.py	Fri Aug 15 14:03:49 2008 -0700
@@ -25,6 +25,7 @@
 
 import socket
 import urllib2
+import httplib
 
 import pkg.fmri
 from pkg.misc import versioned_urlopen
@@ -73,6 +74,9 @@
                     fmri.get_url_path(), ssl_creds = ssl_tuple,
                     imgtype = img.type)
         except urllib2.HTTPError, e:
+                if e.code == httplib.REQUEST_TIMEOUT:
+                        raise TransferTimedOutException
+
                 raise NameError, "could not retrieve manifest '%s' from '%s'" % \
                     (fmri.get_url_path(), url_prefix)
         except urllib2.URLError, e:
--- a/src/modules/depotcontroller.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/depotcontroller.py	Fri Aug 15 14:03:49 2008 -0700
@@ -53,6 +53,7 @@
                 self.__readonly = False
                 self.__rebuild = False
                 self.__refresh_index = False
+                self.__mirror = False
                 self.__logpath = "/tmp/depot.log"
                 self.__output = None
                 self.__depot_handle = None
@@ -84,6 +85,12 @@
         def set_readwrite(self):
                 self.__readonly = False
 
+        def set_mirror(self):
+                self.__mirror = True
+
+        def unset_mirror(self):
+                self.__mirror = False
+
         def set_rebuild(self):
                 self.__rebuild = True
 
@@ -152,6 +159,8 @@
                         args.append("--readonly")
                 if self.__rebuild:
                         args.append("--rebuild")
+                if self.__mirror:
+                        args.append("--mirror")
                 if self.__refresh_index:
                         args.append("--refresh-index")
                 return args
--- a/src/modules/misc.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/misc.py	Fri Aug 15 14:03:49 2008 -0700
@@ -365,6 +365,20 @@
         def __init__(self, args = None):
                 self.args = args
 
+class TransferContentException(Exception):
+        def __init__(self, args = None):
+                self.args = args
+
+class InvalidContentException(Exception):
+        def __init__(self, action, hashval):
+                self.action = action
+                self.hashval = hashval
+
+        def __str__(self):
+                str = "Action with path %s should have hash %s. Computed hash %s instead." % \
+                    (self.action.attrs["path"], self.action.attrs["chash"], 
+                    self.hashval)
+                return str
 
 # Default maximum memory useage during indexing
 # This is a soft cap since memory usage is estimated.
@@ -377,3 +391,4 @@
 except:
         CLIENT_DEFAULT_MEM_USE_KB = 100
         SERVER_DEFAULT_MEM_USE_KB = 500
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/pkggzip.py	Fri Aug 15 14:03:49 2008 -0700
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+import sys
+import struct
+import gzip
+
+def out32u(outf, val):
+        outf.write(struct.pack("<L", val))
+
+class PkgGzipFile(gzip.GzipFile):
+        """This is a version of GzipFile that does not include a file
+        pathname or timestamp in the gzip header.  This allows us to get
+        deterministic gzip files on compression, so that we can reliably
+        use a cryptopgraphic hash on the compressed content."""
+
+        def __init__(self, filename=None, mode=None, compresslevel=9,
+            fileobj=None):
+
+               gzip.GzipFile.__init__(self, filename, mode, compresslevel,
+                    fileobj) 
+
+        def _write_gzip_header(self):
+                self.fileobj.write("\037\213")
+                self.fileobj.write("\010")
+                self.fileobj.write(chr(0))
+                out32u(self.fileobj, long(0))
+                self.fileobj.write("\002")
+                self.fileobj.write("\377")
--- a/src/modules/server/config.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/server/config.py	Fri Aug 15 14:03:49 2008 -0700
@@ -49,6 +49,7 @@
                 self.authority = authority
 
                 self.read_only = False
+                self.mirror = False
 
                 self.in_flight_trans = {}
 
@@ -104,9 +105,15 @@
         def set_read_only(self):
                 self.read_only = True
 
+        def set_mirror(self):
+                self.mirror = True
+
         def is_read_only(self):
                 return self.read_only
 
+        def is_mirror(self):
+                return self.mirror
+
         def acquire_in_flight(self):
                 """Walk trans_root, acquiring valid transaction IDs."""
                 tree = os.walk(self.trans_root)
@@ -124,6 +131,9 @@
                 """Tell the catalog to set itself up.  Associate an
                 instance of the catalog with this depot."""
 
+                if self.is_mirror():
+                        return
+
                 if rebuild == None:
                         rebuild = not self.read_only
                 
--- a/src/modules/server/repository.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/server/repository.py	Fri Aug 15 14:03:49 2008 -0700
@@ -64,6 +64,32 @@
             theory) to instantiate and manage multiple repositories for a single
             depot process. """
 
+        REPO_OPS_DEFAULT = [
+                "versions",
+                "search",
+                "catalog",
+                "manifest",
+                "filelist",
+                "rename",
+                "file",
+                "open",
+                "close",
+                "abandon",
+                "add" ]
+
+        REPO_OPS_READONLY = [
+                "versions",
+                "search",
+                "catalog",
+                "manifest",
+                "filelist",
+                "file" ]
+
+        REPO_OPS_MIRROR = [
+                "versions",
+                "filelist",
+                "file" ]
+
         def __init__(self, scfg):
                 """ Initialise and map the valid operations for the repository.
                     While doing so, ensure that the operations have been
@@ -96,6 +122,13 @@
 
                 self.vops = {}
 
+                if scfg.is_mirror():
+                        self.ops_list = self.REPO_OPS_MIRROR
+                elif scfg.is_read_only():
+                        self.ops_list = self.REPO_OPS_READONLY
+                else:
+                        self.ops_list = self.REPO_OPS_DEFAULT
+
                 # cherrypy has a special handler for favicon, and so we must
                 # setup an instance-level handler instead of just updating
                 # its configuration information.
@@ -109,16 +142,14 @@
                         if not m:
                                 continue
 
-                        # We only want to allow operations we have explicitly
-                        # exposed.  cherrypy will handle this automatically,
-                        # but this prevents a bad operator from being put
-                        # into vops.
-                        assert getattr(func, 'exposed', False), "Unable to " \
-                            "map operation %s; not explicitly exposed." % name
-
                         op = m.group(1)
                         ver = m.group(2)
 
+                        if op not in self.ops_list:
+                                continue
+
+                        func.__dict__['exposed'] = True
+
                         if op in self.vops:
                                 self.vops[op].append(ver)
                         else:
@@ -141,11 +172,14 @@
                     object will be handled by the "externally facing" server
                     code instead. """
 
-                operation = None
+                op = None
                 if len(tokens) > 0:
-                        operation = tokens[0]
+                        op = tokens[0]
 
-                if operation not in self.vops:
+                if op in self.REPO_OPS_DEFAULT and op not in self.vops:
+                        raise cherrypy.HTTPError(httplib.NOT_FOUND,
+                            "Operation not supported in current server mode.")
+                elif op not in self.vops:
                         request = cherrypy.request
                         response = cherrypy.response
                         if face.match(self.scfg, self.rcfg, request, response):
@@ -171,7 +205,6 @@
                     % (version, operation)
                 raise cherrypy.HTTPError(httplib.NOT_FOUND, msg)
 
-        @cherrypy.expose
         @cherrypy.tools.response_headers(headers = \
             [('Content-Type','text/plain')])
         def versions_0(self, *tokens):
@@ -184,7 +217,6 @@
                 ) + "\n"
                 return versions
 
-        @cherrypy.expose
         def search_0(self, *tokens):
                 """ Based on the request path, return a list of packages that
                     match the specified criteria. """
@@ -216,7 +248,6 @@
 
                 return output
 
-        @cherrypy.expose
         def catalog_0(self, *tokens):
                 """ Provide an incremental update or full version of the
                     catalog as appropriate to the requesting client. """
@@ -240,7 +271,6 @@
 
         catalog_0._cp_config = { 'response.stream': True }
 
-        @cherrypy.expose
         def manifest_0(self, *tokens):
                 """ The request is an encoded pkg FMRI.  If the version is
                     specified incompletely, we return an error, as the client
@@ -284,7 +314,6 @@
 
                         cherrypy.request.tar_stream = None
 
-        @cherrypy.expose
         def filelist_0(self, *tokens, **params):
                 """ Request data contains application/x-www-form-urlencoded
                     entries with the requested filenames.  The resulting tar
@@ -321,6 +350,10 @@
                                     self.scfg.file_root,
                                     misc.hash_file_name(v)))
 
+                                # If file isn't here, skip it
+                                if not os.path.exists(filepath):
+                                        continue
+
                                 tar_stream.add(filepath, v, False)
 
                                 self.scfg.inc_flist_files()
@@ -351,7 +384,6 @@
                     'application/data')]
         }
 
-        @cherrypy.expose
         def rename_0(self, *tokens, **params):
                 """ Renames an existing package specified by Src-FMRI to
                     Dest-FMRI.  Returns no output. """
@@ -391,7 +423,6 @@
 
                 self.scfg.inc_renamed()
 
-        @cherrypy.expose
         def file_0(self, *tokens):
                 """ The request is the SHA-1 hash name for the file.  The
                     contents of the file is output directly to the client. """
@@ -406,7 +437,6 @@
                     self.scfg.file_root, misc.hash_file_name(fhash))),
                     'application/data')
 
-        @cherrypy.expose
         def open_0(self, *tokens):
                 """ Starts a transaction for the package name specified in the
                     request path.  Returns no output."""
@@ -415,9 +445,6 @@
 
                 # XXX Authentication will be handled by virtue of possessing a
                 # signed certificate (or a more elaborate system).
-                if self.scfg.is_read_only():
-                        raise cherrypy.HTTPError(httplib.FORBIDDEN,
-                            "Read-only server")
 
                 t = trans.Transaction()
                 ret = t.open(self.scfg, *tokens)
@@ -430,15 +457,10 @@
                 else:
                         raise cherrypy.HTTPError(httplib.INTERNAL_SERVER_ERROR)
 
-        @cherrypy.expose
         def close_0(self, *tokens):
                 """ Ends an in-flight transaction for the Transaction ID
                     specified in the request path.  Returns no output. """
 
-                if self.scfg.is_read_only():
-                        raise cherrypy.HTTPError(httplib.FORBIDDEN,
-                            "Read-only server")
-
                 try:
                         # cherrypy decoded it, but we actually need it encoded.
                         trans_id = urllib.quote("%s" % tokens[0], "")
@@ -454,14 +476,10 @@
                         t.close()
                         del self.scfg.in_flight_trans[trans_id]
 
-        @cherrypy.expose
         def abandon_0(self, *tokens):
                 """ Aborts an in-flight transaction for the Transaction ID
                     specified in the request path.  Returns no output. """
 
-                if self.scfg.is_read_only():
-                        raise cherrypy.HTTPError(httplib.FORBIDDEN,
-                            "Read-only server")
                 try:
                         # cherrypy decoded it, but we actually need it encoded.
                         trans_id = urllib.quote("%s" % tokens[0], "")
@@ -477,17 +495,12 @@
                         t.abandon()
                         del self.scfg.in_flight_trans[trans_id]
 
-        @cherrypy.expose
         def add_0(self, *tokens, **params):
                 """ Adds content to an in-flight transaction for the
                     Transaction ID specified in the request path.  The content
                     is expected to be in the request body.  Returns no
                     output. """
 
-                if self.scfg.is_read_only():
-                        raise cherrypy.HTTPError(httplib.FORBIDDEN,
-                            "Read-only server")
-
                 try:
                         # cherrypy decoded it, but we actually need it encoded.
                         trans_id = urllib.quote("%s" % tokens[0], "")
--- a/src/modules/server/transaction.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/modules/server/transaction.py	Fri Aug 15 14:03:49 2008 -0700
@@ -24,7 +24,6 @@
 
 import cherrypy
 import errno
-import gzip
 import httplib
 import logging
 import os
@@ -35,11 +34,13 @@
 import datetime
 import calendar
 import urllib
+import stat
 
 import pkg.actions
 import pkg.fmri as fmri
 import pkg.misc as misc
 import pkg.portable as portable
+from pkg.pkggzip import PkgGzipFile
 
 try:
         import pkg.elf as elf
@@ -258,7 +259,9 @@
                         fhash = sha.new(data)
                         fname = fhash.hexdigest()
                         action.hash = fname
-                        ofile = gzip.GzipFile("%s/%s" % (self.dir, fname), "wb")
+
+                        opath = os.path.join(self.dir, fname)
+                        ofile = PkgGzipFile(opath, "wb")
 
                         bufsz = 64 * 1024
 
@@ -273,6 +276,28 @@
                         ofile.write(data[m:])
                         ofile.close()
 
+                        data = None
+                
+                        # Now that the file has been compressed, determine its
+                        # size and store that as an attribute in the manifest
+                        # for the file.
+                        fs = os.stat(opath)
+                        action.attrs["pkg.csize"] = str(fs[stat.ST_SIZE])
+
+                        # Compute the SHA hash of the compressed file.
+                        # Store this as the chash attribute of the file's
+                        # action.  In order for this to work correctly, we
+                        # have to use the PkgGzipFile class.  It omits
+                        # filename and timestamp information from the gzip
+                        # header, allowing us to generate deterministic
+                        # hashes for different files with identical content.
+                        cfile = open(opath, "rb")
+                        cdata = cfile.read()
+                        cfile.close()
+                        chash = sha.new(cdata)
+                        action.attrs["chash"] = chash.hexdigest()
+                        cdata = None
+
                 tfile = file("%s/manifest" % self.dir, "a")
                 print >> tfile, action
                 tfile.close()
--- a/src/tests/baseline.txt	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/tests/baseline.txt	Fri Aug 15 14:03:49 2008 -0700
@@ -160,6 +160,8 @@
 cli.t_commandline.py TestCommandLine.test_pkg_missing_args|pass
 cli.t_commandline.py TestCommandLine.test_pkg_vq_1153|pass
 cli.t_commandline.py TestCommandLine.test_pkgsend_bogus_opts|pass
+cli.t_commandline.py TestCommandLine.test_mirror|pass
+cli.t_commandline.py TestCommandLine.test_mirror_longopt|pass
 cli.t_depot.py TestDepot.testStartStop|pass
 cli.t_depot.py TestDepot.test_bug_1876|pass
 cli.t_depot.py TestDepot.test_depot_ping|pass
--- a/src/tests/cli/t_commandline.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/tests/cli/t_commandline.py	Fri Aug 15 14:03:49 2008 -0700
@@ -153,5 +153,56 @@
                 self.pkg("set-authority -O http://test1:abcde test2", exit=1)
                 self.pkg("set-authority -O ftp://test2 test2", exit=1)
 
+        def test_mirror(self):
+                """Test set-mirror and unset-mirror."""
+                durl = self.dc.get_depot_url()
+                pfx = "mtest"
+                self.image_create(durl, prefix = pfx)
+
+                self.pkg("set-authority -m http://test1 mtest")
+                self.pkg("set-authority -m http://test2.test.com mtest")
+                self.pkg("set-authority -m http://test5", exit=2)
+                self.pkg("set-authority -m mtest", exit=2)
+                self.pkg("set-authority -m http://test1 mtest", exit=1)
+                self.pkg("set-authority -m http://test5 test", exit=1)
+                self.pkg("set-authority -m test7 mtest", exit=1)
+
+                self.pkg("set-authority -M http://test1 mtest")
+                self.pkg("set-authority -M http://test2.test.com mtest")
+                self.pkg("set-authority -M mtest http://test2 http://test4",
+                    exit=2)
+                self.pkg("set-authority -M http://test5", exit=2)
+                self.pkg("set-authority -M mtest", exit=2)
+                self.pkg("set-authority -M http://test5 test", exit=1)
+                self.pkg("set-authority -M http://test6 mtest", exit=1)
+                self.pkg("set-authority -M test7 mtest", exit=1)
+
+        def test_mirror_longopt(self):
+                """Test set-mirror and unset-mirror."""
+                durl = self.dc.get_depot_url()
+                pfx = "mtest"
+                self.image_create(durl, prefix = pfx)
+
+                self.pkg("set-authority --add-mirror=http://test1 mtest")
+                self.pkg("set-authority --add-mirror=http://test2.test.com mtest")
+                self.pkg("set-authority --add-mirror=http://test5", exit=2)
+                self.pkg("set-authority --add-mirror mtest", exit=2)
+                self.pkg("set-authority --add-mirror=http://test1 mtest",
+                    exit=1)
+                self.pkg("set-authority --add-mirror=http://test5 test", exit=1)
+                self.pkg("set-authority --add-mirror=test7 mtest", exit=1)
+
+                self.pkg("set-authority --remove-mirror=http://test1 mtest")
+                self.pkg("set-authority --remove-mirror=http://test2.test.com mtest")
+                self.pkg("set-authority --remove-mirror=mtest http://test2 http://test4",
+                    exit=2)
+                self.pkg("set-authority --remove-mirror=http://test5", exit=2)
+                self.pkg("set-authority --remove-mirror mtest", exit=2)
+                self.pkg("set-authority --remove-mirror=http://test5 test",
+                    exit=1)
+                self.pkg("set-authority --remove-mirror=http://test6 mtest",
+                    exit=1)
+                self.pkg("set-authority --remove-mirror=test7 mtest", exit=1)
+
 if __name__ == "__main__":
         unittest.main()
--- a/src/tests/cli/t_depotcontroller.py	Fri Aug 15 11:21:48 2008 -0700
+++ b/src/tests/cli/t_depotcontroller.py	Fri Aug 15 14:03:49 2008 -0700
@@ -95,5 +95,17 @@
 
                 self.assert_(self.__dc.start_expected_fail())
 
+                self.__dc.set_mirror()
+                self.__dc.set_rebuild()
+                self.__dc.set_norefresh_index()
+
+                self.assert_(self.__dc.start_expected_fail())
+
+                self.__dc.set_mirror()
+                self.__dc.set_norebuild()
+                self.__dc.set_refresh_index()
+
+                self.assert_(self.__dc.start_expected_fail())
+
 if __name__ == "__main__":
         unittest.main()