17515714 pkgrepo should allow some operations for corrupt repositories
authorShawn Walker <shawn.walker@oracle.com>
Fri, 27 Sep 2013 11:21:00 -0700
changeset 2990 2cc6693a7d83
parent 2989 8de3e37c0251
child 2991 75e616731cc3
17515714 pkgrepo should allow some operations for corrupt repositories
src/modules/client/api_errors.py
src/modules/server/repository.py
src/pkgrepo.py
src/tests/cli/t_pkgrepo.py
--- a/src/modules/client/api_errors.py	Wed Dec 04 10:27:27 2013 -0800
+++ b/src/modules/client/api_errors.py	Fri Sep 27 11:21:00 2013 -0700
@@ -1052,7 +1052,8 @@
         """Used to indicate a Catalog file could not be loaded."""
 
         def __str__(self):
-                return _("Catalog file '%s' is invalid.") % self.data
+                return _("Catalog file '%s' is invalid.\nUse 'pkgrepo rebuild' "
+                    "to create a new package catalog.") % self.data
 
 
 class MismatchedCatalog(CatalogError):
--- a/src/modules/server/repository.py	Wed Dec 04 10:27:27 2013 -0800
+++ b/src/modules/server/repository.py	Fri Sep 27 11:21:00 2013 -0700
@@ -322,8 +322,9 @@
         intended only for use by the Repository class.
         """
 
-        def __init__(self, file_layout=None, file_root=None, log_obj=None,
-            mirror=False, pub=None, read_only=False, root=None,
+        def __init__(self, allow_invalid=False, file_layout=None,
+            file_root=None, log_obj=None, mirror=False, pub=None,
+            read_only=False, root=None,
             sort_file_max_size=indexer.SORT_FILE_MAX_SIZE, writable_root=None):
                 """Prepare the repository for use."""
 
@@ -381,7 +382,7 @@
                 # Initialize.
                 self.__lock_rstore(blocking=True)
                 try:
-                        self.__init_state()
+                        self.__init_state(allow_invalid=allow_invalid)
                 finally:
                         self.__unlock_rstore()
 
@@ -800,7 +801,7 @@
                         self.__index_log("Search Available")
                 self.__search_available = True
 
-        def __init_state(self):
+        def __init_state(self, allow_invalid=False):
                 """Private version; caller responsible for repository
                 locking."""
 
@@ -809,7 +810,7 @@
                 self.__catalog = None
 
                 # Determine location and version of catalog data.
-                self.__init_catalog()
+                self.__init_catalog(allow_invalid=allow_invalid)
 
                 # Prepare search for use (ensuring most current data is loaded).
                 self.reset_search()
@@ -829,12 +830,17 @@
 
                 self.__check_search()
 
-        def __init_catalog(self):
+        def __init_catalog(self, allow_invalid=False):
                 """Private function to determine version and location of
                 catalog data.  This will also perform any necessary
                 transformations of existing catalog data if the repository
                 is read-only and a writable_root has been provided.
-                """
+
+                'allow_invalid', if True, will assume the catalog is version 1
+                and use an empty, in-memory catalog if the existing, on-disk
+                catalog is invalid (i.e. corrupted).  This assumes that the
+                caller intends to use the repository as part of a rebuild
+                operation."""
 
                 # Reset versions to default.
                 self.catalog_version = -1
@@ -948,8 +954,22 @@
                                 self.__set_catalog_root(v1_cat.meta_root)
                         else:
                                 self.catalog_version = 0
-                elif self.catalog.exists:
-                        self.catalog_version = 1
+                else:
+                        try:
+                                if self.catalog.exists:
+                                        self.catalog_version = 1
+                        except apx.CatalogError, e:
+                                if not allow_invalid:
+                                        raise
+
+                                # Catalog is invalid, but consumer wants to
+                                # proceed; assume version 1.  This will allow
+                                # pkgrepo rebuild, etc.
+                                self.__log(str(e))
+                                self.__catalog = catalog.Catalog(
+                                    read_only=self.read_only)
+                                self.catalog_verison = 1
+                                return
 
                 if self.catalog_version >= 1 and not self.publisher:
                         # If there's no information available to determine
@@ -2550,9 +2570,9 @@
         """A Repository object is a representation of data contained within a
         pkg(5) repository and an interface to manipulate it."""
 
-        def __init__(self, cfgpathname=None, create=False, file_root=None,
-            log_obj=None, mirror=False, properties=misc.EmptyDict,
-            read_only=False, root=None,
+        def __init__(self, allow_invalid=False, cfgpathname=None, create=False,
+            file_root=None, log_obj=None, mirror=False,
+            properties=misc.EmptyDict, read_only=False, root=None,
             sort_file_max_size=indexer.SORT_FILE_MAX_SIZE, writable_root=None):
                 """Prepare the repository for use."""
 
@@ -2592,11 +2612,13 @@
 
                 self.__lock_repository()
                 try:
-                        self.__init_state(create=create, properties=properties)
+                        self.__init_state(allow_invalid=allow_invalid,
+                            create=create, properties=properties)
                 finally:
                         self.__unlock_repository()
 
-        def __init_format(self, create=False, properties=misc.EmptyI):
+        def __init_format(self, allow_invalid=False, create=False,
+            properties=misc.EmptyI):
                 """Private helper function to determine repository format and
                 validity.
                 """
@@ -2729,18 +2751,21 @@
                         # ...and then one for each publisher if any are known.
                         if self.pub_root and os.path.exists(self.pub_root):
                                 for pub in os.listdir(self.pub_root):
-                                        self.__new_rstore(pub)
+                                        self.__new_rstore(pub,
+                                            allow_invalid=allow_invalid)
 
                         # If a default publisher is set, ensure that a storage
                         # object always exists for it.
                         if def_pub and def_pub not in self.__rstores:
-                                self.__new_rstore(def_pub)
+                                self.__new_rstore(def_pub,
+                                    allow_invalid=allow_invalid)
                 else:
                         # For older repository versions, there is only one
                         # repository store, and it might have an associated
                         # publisher prefix.  (This might be in a mix of V0 and
                         # V1 layouts.)
-                        rstore = _RepoStore(file_root=self.file_root,
+                        rstore = _RepoStore(allow_invalid=allow_invalid,
+                            file_root=self.file_root,
                             log_obj=self.log_obj, pub=def_pub,
                             mirror=self.mirror,
                             read_only=self.read_only,
@@ -2780,14 +2805,16 @@
                                         # matter.
                                         continue
 
-        def __init_state(self, create=False, properties=misc.EmptyDict):
+        def __init_state(self, allow_invalid=False, create=False,
+            properties=misc.EmptyDict):
                 """Private helper function to initialize state."""
 
                 # Discard current repository storage state data.
                 self.__rstores = {}
 
                 # Determine format, configuration location, and validity.
-                self.__init_format(create=create, properties=properties)
+                self.__init_format(allow_invalid=allow_invalid, create=create,
+                    properties=properties)
 
                 # Ensure default configuration is written.
                 self.__write_config()
@@ -2873,7 +2900,7 @@
                             errno.EROFS):
                                 raise
 
-        def __new_rstore(self, pub):
+        def __new_rstore(self, pub, allow_invalid=False):
                 assert pub
                 if pub in self.__rstores:
                         raise RepositoryDuplicatePublisher(pub)
@@ -2905,7 +2932,8 @@
                         # might use a mix of layouts.
                         file_layout = layout.V1Layout()
 
-                rstore = _RepoStore(file_layout=file_layout, file_root=froot,
+                rstore = _RepoStore(allow_invalid=allow_invalid,
+                    file_layout=file_layout, file_root=froot,
                     log_obj=self.log_obj, mirror=self.mirror, pub=pub,
                     read_only=self.read_only, root=root,
                     sort_file_max_size=self.__sort_file_max_size,
--- a/src/pkgrepo.py	Wed Dec 04 10:27:27 2013 -0800
+++ b/src/pkgrepo.py	Fri Sep 27 11:21:00 2013 -0700
@@ -251,8 +251,11 @@
         return EXIT_OK
 
 
-def get_repo(conf, read_only=True, subcommand=None):
-        """Return the repository object for current program configuration."""
+def get_repo(conf, allow_invalid=False, read_only=True, subcommand=None):
+        """Return the repository object for current program configuration.
+
+        'allow_invalid' specifies whether potentially corrupt repositories are
+        allowed; should only be True if performing a rebuild operation."""
 
         repo_uri = conf["repo_uri"]
         if repo_uri.scheme != "file":
@@ -263,7 +266,8 @@
         if not path:
                 # Bad URI?
                 raise sr.RepositoryInvalidError(str(repo_uri))
-        return sr.Repository(read_only=read_only, root=path)
+        return sr.Repository(allow_invalid=allow_invalid, read_only=read_only,
+            root=path)
 
 
 def setup_transport(conf, subcommand=None, verbose=False, ssl_key=None,
@@ -1022,6 +1026,61 @@
         return EXIT_OK
 
 
+def __rebuild_local(subcommand, conf, pubs, build_catalog, build_index):
+        """In an attempt to allow operations on potentially corrupt
+        repositories, 'local' repositories (filesystem-basd ones) are handled
+        separately."""
+
+        repo = get_repo(conf, allow_invalid=build_catalog, read_only=False,
+            subcommand=subcommand)
+
+        rpubs = set(repo.publishers)
+        if not pubs:
+                found = rpubs
+        else:
+                found = rpubs & pubs
+        notfound = pubs - found
+
+        rval = EXIT_OK
+        if found and notfound:
+                rval = EXIT_PARTIAL
+        elif pubs and not found:
+                error(_("no matching publishers found"), cmd=subcommand)
+                return EXIT_OOPS
+
+        logger.info("Initiating repository rebuild.")
+        for pfx in found:
+                repo.rebuild(build_catalog=build_catalog,
+                    build_index=build_index, pub=pfx)
+
+        return rval
+
+
+def __rebuild_remote(subcommand, conf, pubs, key, cert, build_catalog,
+    build_index):
+        def do_rebuild(xport, xpub):
+                if build_catalog and build_index:
+                        xport.publish_rebuild(xpub)
+                elif build_catalog:
+                        xport.publish_rebuild_packages(xpub)
+                elif build_index:
+                        xport.publish_rebuild_indexes(xpub)
+
+        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
+            ssl_key=key, ssl_cert=cert)
+        rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
+            xpub)
+        if rval == EXIT_OOPS:
+                return rval
+
+        logger.info("Initiating repository rebuild.")
+        for pfx in found:
+                xpub.prefix = pfx
+                do_rebuild(xport, xpub)
+
+        return rval
+
+
 def subcmd_rebuild(conf, args):
         """Rebuild the repository's catalog and index data (as permitted)."""
 
@@ -1063,27 +1122,12 @@
                 usage(_("A package repository location must be provided "
                     "using -s."), cmd=subcommand)
 
-        def do_rebuild(xport, xpub):
-                if build_catalog and build_index:
-                        xport.publish_rebuild(xpub)
-                elif build_catalog:
-                        xport.publish_rebuild_packages(xpub)
-                elif build_index:
-                        xport.publish_rebuild_indexes(xpub)
+        if conf["repo_uri"].scheme == "file":
+                return __rebuild_local(subcommand, conf, pubs, build_catalog,
+                    build_index)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
-        rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
-            xpub)
-        if rval == EXIT_OOPS:
-                return rval
-
-        logger.info("Initiating repository rebuild.")
-        for pfx in found:
-                xpub.prefix = pfx
-                do_rebuild(xport, xpub)
-
-        return rval
+        return __rebuild_remote(subcommand, conf, pubs, key, cert,
+            build_catalog, build_index)
 
 
 def subcmd_refresh(conf, args):
--- a/src/tests/cli/t_pkgrepo.py	Wed Dec 04 10:27:27 2013 -0800
+++ b/src/tests/cli/t_pkgrepo.py	Fri Sep 27 11:21:00 2013 -0700
@@ -928,6 +928,32 @@
                 self.assertEqualDiff([pfmri],
                     [str(f) for f in repo.get_catalog("test").fmris()])
  
+                # Now verify that 'pkgrepo rebuild' will still work
+                # (filesystem-based repos only) if the catalog is corrupted.
+                cat = repo.get_catalog("test")
+                part = cat.get_part("catalog.attrs")
+                apath = part.pathname
+
+                with open(apath, "r+b") as cfile:
+                        cfile.truncate(4)
+                        cfile.close()
+
+                # Should fail, since catalog is corrupt.
+                self.pkgrepo("refresh -s %s" % repo_path, exit=1)
+
+                # Should fail, because --no-catalog was specified.
+                self.pkgrepo("rebuild -s %s --no-catalog" % repo_path, exit=1)
+
+                # Should succeed.
+                self.pkgrepo("rebuild -s %s --no-index" % repo_path)
+
+                # Should succeed now that catalog is valid.
+                self.pkgrepo("refresh -s %s" % repo_path)
+
+                # Verify expected package is still known.
+                self.assertEqualDiff([pfmri],
+                    [str(f) for f in repo.get_catalog("test").fmris()])
+
         def __test_refresh(self, repo_path, repo_uri):
                 """Private function to verify refresh subcommand behaviour."""