upstream/oracle/pkg-gate: comparison src/modules/p5p.py

equal deleted inserted replaced

-:f025ba1faae7
+:60ad60f7592c
+#!/usr/bin/python
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+#
+import atexit
+import collections
+import errno
+import tarfile as tf
+import pkg.pkggzip
+import pkg.pkgtarfile as ptf
+import os
+import pkg
+import pkg.client.api_errors as apx
+import pkg.client.publisher
+import pkg.fmri
+import pkg.misc
+import pkg.portable
+import pkg.p5i
+import shutil
+import tempfile
+import urllib
+class ArchiveErrors(apx.ApiException):
+"""Base exception class for archive class errors."""
+class InvalidArchiveIndex(ArchiveErrors):
+"""Used to indicate that the specified index is in a format not
+supported or recognized by this version of the pkg(5) ArchiveIndex
+class."""
+def __init__(self, arc_name):
+ArchiveErrors.__init__(self)
+self.__name = arc_name
+def __str__(self):
+return _("%s is not in a supported or recognizable archive "
+"index format.") % self.__name
+class ArchiveIndex(object):
+"""Class representing a pkg(5) archive table of contents and a set of
+interfaces to populate and retrieve entries.
+Entries in this file are written in the following format:
+<name>NUL<offset>NUL<entry_size>NUL<size>NUL<typeflag>NULNL
+<name> is a string containing the pathname of the file in the
+archive.  It can be up to 65,535 bytes in length.
+<offset> is an unsigned long long integer containing the relative
+offset in bytes of the first header block for the file in the
+archive.  The offset is relative to the end of the last block of
+the first file in the archive.
+<entry_size> is an unsigned long long integer containing the size of
+the file's entry in bytes in the archive (including archive
+headers and trailers for the entry).
+<size> is an unsigned long long integer containing the size of the
+file in bytes in the archive.
+<typeflag> is a single character representing the type of the file
+in the archive.  Possible values are:
+0 Regular File
+1 Hard Link
+2 Symbolic Link
+5 Directory or subdirectory"""
+version = None
+CURRENT_VERSION = 0
+COMPATIBLE_VERSIONS = 0,
+ENTRY_FORMAT = "%s\0%d\0%d\0%d\0%c\0\n"
+def __init__(self, name, mode="r", version=None):
+"""Open a pkg(5) archive table of contents file.
+'name' should be the absolute path of the file to use when
+reading or writing index data.
+'mode' indicates whether the index is being used for reading
+or writing, and can be 'r' or 'w'.  Appending to or updating
+a table of contents file is not supported.
+'version' is an optional integer value specifying the version
+of the index to be read or written.  If not specified, the
+current version is assumed.
+"""
+assert os.path.isabs(name)
+if version is None:
+version = self.CURRENT_VERSION
+if version not in self.COMPATIBLE_VERSIONS:
+raise InvalidArchiveIndex(name)
+self.__closed = False
+self.__name = name
+self.__mode = mode + "b"
+try:
+self.__file = pkg.pkggzip.PkgGzipFile(self.__name,
+self.__mode)
+except IOError, e:
+if e.errno:
+raise
+# Underlying gzip library raises this exception if the
+# file isn't a valid gzip file.  So, assume that if
+# errno isn't set, this is a gzip error instead.
+raise InvalidArchiveIndex(name)
+self.version = version
+def __exit__(self, exc_type, exc_value, exc_tb):
+"""Context handler that ensures archive is automatically closed
+in a non-error condition scenario.  This enables 'with' usage.
+"""
+if exc_type or exc_value or exc_tb:
+# Only close filehandles in an error condition.
+self.__close_fh()
+else:
+# Close archive normally in all other cases.
+self.close()
+@property
+def pathname(self):
+"""The absolute path of the archive index file."""
+return self.__name
+def add(self, name, offset, entry_size, size, typeflag):
+"""Add an entry for the given archive file to the table of
+contents."""
+self.__file.write(self.ENTRY_FORMAT % (name, offset, entry_size,
+size, typeflag))
+def offsets(self):
+"""Returns a generator that yields tuples of the form (name,
+offset) for each file in the index."""
+self.__file.seek(0)
+l = None
+try:
+for line in self.__file:
+if line[-2] != "\0":
+# Filename contained newline.
+if l is None:
+l = line
+else:
+l += "\n"
+l += line
+continue
+elif l is None:
+l = line
+name, offset, ignored = l.split("\0", 2)
+yield name, long(offset)
+l = None
+except ValueError:
+raise InvalidArchiveIndex(self.__name)
+except IOError, e:
+if e.errno:
+raise
+# Underlying gzip library raises this exception if the
+# file isn't a valid gzip file.  So, assume that if
+# errno isn't set, this is a gzip error instead.
+raise InvalidArchiveIndex(self.__name)
+def close(self):
+"""Close the index.  No further operations can be performed
+using this object once closed."""
+if self.__closed:
+return
+if self.__file:
+self.__file.close()
+self.__file = None
+self.__closed = True
+class InvalidArchive(ArchiveErrors):
+"""Used to indicate that the specified archive is in a format not
+supported or recognized by this version of the pkg(5) Archive class.
+"""
+def __init__(self, arc_name):
+ArchiveErrors.__init__(self)
+self.arc_name = arc_name
+def __str__(self):
+return _("Archive %s is missing, unsupported, or corrupt.") % \
+self.arc_name
+class CorruptArchiveFiles(ArchiveErrors):
+"""Used to indicate that the specified file(s) could not be found in the
+archive.
+"""
+def __init__(self, arc_name, files):
+ArchiveErrors.__init__(self)
+self.arc_name = arc_name
+self.files = files
+def __str__(self):
+return _("Package archive %(arc_name)s contains corrupt "
+"entries for the requested package file(s):\n%(files)s.") % {
+"arc_name": self.arc_name,
+"files": "\n".join(self.files) }
+class UnknownArchiveFiles(ArchiveErrors):
+"""Used to indicate that the specified file(s) could not be found in the
+archive.
+"""
+def __init__(self, arc_name, files):
+ArchiveErrors.__init__(self)
+self.arc_name = arc_name
+self.files = files
+def __str__(self):
+return _("Package archive %(arc_name)s does not contain the "
+"requested package file(s):\n%(files)s.") % {
+"arc_name": self.arc_name,
+"files": "\n".join(self.files) }
+class UnknownPackageManifest(ArchiveErrors):
+"""Used to indicate that a manifest for the specified package could not
+be found in the archive.
+"""
+def __init__(self, arc_name, pfmri):
+ArchiveErrors.__init__(self)
+self.arc_name = arc_name
+self.pfmri = pfmri
+def __str__(self):
+return _("No package manifest for package '%(pfmri)s' exists "
+"in archive %(arc_name)s.") % self.__dict__
+class Archive(object):
+"""Class representing a pkg(5) archive and a set of interfaces to
+populate it and retrieve data from it.
+This class stores package data in pax archives in version 4 repository
+format.  Encoding the structure of a repository into the archive is
+necessary to enable easy composition of package archive contents with
+existing repositories and to enable consumers to access the contents of
+a package archive the same as they would a repository.
+This class can be used to access or extract the contents of almost any
+tar archive, except for those that are compressed.
+"""
+__idx_pfx = "pkg5.index."
+__idx_sfx = ".gz"
+__idx_name = "pkg5.index.%s.gz"
+__idx_ver = ArchiveIndex.CURRENT_VERSION
+__index = None
+__arc_tfile = None
+__arc_file = None
+version = None
+# If the repository format changes, then the version of the package
+# archive format should be rev'd and this updated.  (Although that isn't
+# strictly necessary, as the Repository class should remain backwards
+# compatible with this format.)
+CURRENT_VERSION = 0
+COMPATIBLE_VERSIONS = (0,)
+def __init__(self, pathname, mode="r"):
+"""'pathname' is the absolute path of the archive file to create
+or read from.
+'mode' is a string used to indicate whether the archive is being
+opened for reading or writing, which is indicated by 'r' and 'w'
+respectively.  An archive opened for writing may not be used for
+any extraction operations, and must not already exist.
+"""
+assert os.path.isabs(pathname)
+self.__arc_name = pathname
+self.__closed = False
+self.__mode = mode
+self.__temp_dir = tempfile.mkdtemp()
+# Used to cache publisher objects.
+self.__pubs = None
+# Used to cache location of publisher catalog data.
+self.__catalogs = {}
+arc_mode = mode + "b"
+mode += ":"
+assert "r" in mode or "w" in mode
+assert "a" not in mode
+if "w" in mode:
+# Don't allow overwrite of existing archive.
+assert not os.path.exists(self.__arc_name)
+try:
+self.__arc_file = open(self.__arc_name, arc_mode,
+128*1024)
+except EnvironmentError, e:
+if e.errno in (errno.ENOENT, errno.EISDIR):
+raise InvalidArchive(self.__arc_name)
+raise apx._convert_error(e)
+self.__queue_offset = 0
+self.__queue = collections.deque()
+# Ensure cleanup is performed on exit if the archive is not
+# explicitly closed.
+def arc_cleanup():
+if not self.__closed:
+self.__close_fh()
+self.__cleanup()
+return
+atexit.register(arc_cleanup)
+# Open the pax archive for the package.
+try:
+self.__arc_tfile = ptf.PkgTarFile.open(mode=mode,
+fileobj=self.__arc_file, format=tf.PAX_FORMAT)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+except Exception:
+# Likely not an archive or the archive is corrupt.
+raise InvalidArchive(self.__arc_name)
+self.__extract_offsets = {}
+if "r" in mode:
+# Opening the tarfile loaded the first member, which
+# should be the archive index file.
+member = self.__arc_tfile.firstmember
+if not member:
+# Archive is empty.
+raise InvalidArchive(self.__arc_name)
+if not member.name.startswith(self.__idx_pfx) or \
+not member.name.endswith(self.__idx_sfx):
+return
+else:
+self.__idx_name = member.name
+comment = member.pax_headers.get("comment", "")
+if not comment.startswith("pkg5.archive.version."):
+return
+try:
+self.version = int(comment.rsplit(".", 1)[-1])
+except (IndexError, ValueError):
+raise InvalidArchive(self.__arc_name)
+if self.version not in self.COMPATIBLE_VERSIONS:
+raise InvalidArchive(self.__arc_name)
+# Create a temporary file to extract the index to,
+# and then extract it from the archive.
+fobj, idxfn = self.__mkstemp()
+fobj.close()
+try:
+self.__arc_tfile.extract_to(member,
+path=self.__temp_dir,
+filename=os.path.basename(idxfn))
+except tf.TarError:
+# Read error encountered.
+raise InvalidArchive(self.__arc_name)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+# After extraction, the current archive file offset
+# is the base that will be used for all other
+# extractions.
+index_offset = self.__arc_tfile.offset
+# Load archive index.
+try:
+self.__index = ArchiveIndex(idxfn, mode="r",
+version=self.__idx_ver)
+for name, offset in self.__index.offsets():
+self.__extract_offsets[name] = \
+index_offset + offset
+except InvalidArchiveIndex:
+# Index is corrupt; rather than driving on
+# and failing later, bail now.
+os.unlink(idxfn)
+raise InvalidArchive(self.__arc_name)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+elif "w" in mode:
+self.__pubs = {}
+# Force normalization of archive member mode and
+# ownership information during archive creation.
+def gettarinfo(*args, **kwargs):
+ti = ptf.PkgTarFile.gettarinfo(self.__arc_tfile,
+*args, **kwargs)
+if ti.isreg():
+ti.mode = pkg.misc.PKG_FILE_MODE
+elif ti.isdir():
+ti.mode = pkg.misc.PKG_DIR_MODE
+if ti.name == "pkg5.index.0.gz":
+ti.pax_headers["comment"] = \
+"pkg5.archive.version.%d" % \
+self.CURRENT_VERSION
+ti.uid = 0
+ti.gid = 0
+ti.uname = "root"
+ti.gname = "root"
+return ti
+self.__arc_tfile.gettarinfo = gettarinfo
+self.__idx_name = self.__idx_name % self.__idx_ver
+# Create a temporary file to write the index to,
+# and then create the index.
+fobj, idxfn = self.__mkstemp()
+fobj.close()
+self.__index = ArchiveIndex(idxfn, mode=arc_mode)
+# Used to determine what the default publisher will be
+# for the archive file at close().
+self.__default_pub = ""
+# Used to keep track of which package files have already
+# been added to archive.
+self.__processed_pfiles = set()
+# Always create archives using current version.
+self.version = self.CURRENT_VERSION
+# Always add base publisher directory to start; tarfile
+# requires an actual filesystem object to do this, so
+# re-use an existing directory to do so.
+self.add("/", arcname="publisher")
+def __exit__(self, exc_type, exc_value, exc_tb):
+"""Context handler that ensures archive is automatically closed
+in a non-error condition scenario.  This enables 'with' usage.
+"""
+if exc_type or exc_value or exc_tb:
+# Only close file objects; don't actually write anything
+# out in an error condition.
+self.__close_fh()
+return
+# Close and/or write out archive as needed.
+self.close()
+def __find_extract_offsets(self):
+"""Private helper method to find offsets for individual archive
+member extraction.
+"""
+if self.__extract_offsets:
+return
+# This causes the entire archive to be read, but is the only way
+# to find the offsets to extract everything.
+try:
+for member in self.__arc_tfile.getmembers():
+self.__extract_offsets[member.name] = \
+member.offset
+except tf.TarError:
+# Read error encountered.
+raise InvalidArchive(self.__arc_name)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+def __mkdtemp(self):
+"""Creates a temporary directory for use during archive
+operations, and return its absolute path.  The temporary
+directory will be removed after the archive is closed.
+"""
+try:
+return tempfile.mkdtemp(dir=self.__temp_dir)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+def __mkstemp(self):
+"""Creates a temporary file for use during archive operations,
+and returns a file object for it and its absolute path.  The
+temporary file will be removed after the archive is closed.
+"""
+try:
+fd, fn = tempfile.mkstemp(dir=self.__temp_dir)
+fobj = os.fdopen(fd, "wb")
+except EnvironmentError, e:
+raise apx._convert_error(e)
+return fobj, fn
+def add(self, pathname, arcname=None):
+"""Queue the specified object for addition to the archive.
+The archive will be created and the object added to it when the
+close() method is called.  The target object must not change
+after this method is called while the archive is open.  The
+item being added must not already exist in the archive.
+'pathname' is an optional string specifying the absolute path
+of a file to add to the archive.  The file may be a regular
+file, directory, symbolic link, or hard link.
+'arcname' is an optional string specifying an alternative name
+for the file in the archive.  If not given, the full pathname
+provided will be used.
+"""
+assert not self.__closed and "w" in self.__mode
+tfile = self.__arc_tfile
+ti = tfile.gettarinfo(pathname, arcname=arcname)
+buf = ti.tobuf(tfile.format, tfile.encoding, tfile.errors)
+# Pre-calculate size of archive entry by determining where
+# in the archive the entry would be added.
+entry_sz = len(buf)
+blocks, rem = divmod(ti.size, tf.BLOCKSIZE)
+if rem > 0:
+blocks += 1
+entry_sz += blocks * tf.BLOCKSIZE
+# Record name, offset, entry_size, size type for each file.
+self.__index.add(ti.name, self.__queue_offset, entry_sz,
+ti.size, ti.type)
+self.__queue_offset += entry_sz
+self.__queue.append((pathname, ti.name))
+# Discard tarinfo; it would be more efficient to keep these in
+# memory, but at a significant memory footprint cost.
+ti.tarfile = None
+del ti
+def __add_publisher_files(self, root, file_dir, hashes, fpath=None,
+repo=None):
+"""Private helper function for adding package files."""
+if file_dir not in self.__processed_pfiles:
+# Directory entry needs to be added
+# for package files.
+self.add(root, arcname=file_dir)
+self.__processed_pfiles.add(file_dir)
+for fhash in hashes:
+hash_dir = os.path.join(file_dir, fhash[:2])
+if hash_dir not in self.__processed_pfiles:
+# Directory entry needs to be added
+# for hash directory.
+self.add(root, arcname=hash_dir)
+self.__processed_pfiles.add(hash_dir)
+hash_fname = os.path.join(hash_dir, fhash)
+if hash_fname in self.__processed_pfiles:
+# Already added for a different
+# package.
+continue
+if repo:
+src = repo.file(fhash)
+else:
+src = os.path.join(fpath, fhash)
+self.add(src, arcname=hash_fname)
+# A bit expensive potentially in terms of
+# memory usage, but necessary to prevent
+# duplicate archive entries.
+self.__processed_pfiles.add(hash_fname)
+def __add_package(self, pfmri, mpath, fpath=None, repo=None):
+"""Private helper function that queues a package for addition to
+the archive.
+'mpath' is the absolute path of the package manifest file.
+'fpath' is an optional directory containing the package files
+stored by hash.
+'repo' is an optional Repository object to use to retrieve the
+data for the package to be added to the archive.
+'fpath' or 'repo' must be provided.
+"""
+assert not self.__closed and "w" in self.__mode
+assert mpath
+assert not (fpath and repo)
+assert fpath or repo
+if not self.__default_pub:
+self.__default_pub = pfmri.publisher
+m = pkg.manifest.Manifest(pfmri)
+m.set_content(pathname=mpath)
+# Throughout this function, the archive root directory is used
+# as a template to add other directories that should be present
+# in the archive.  This is necessary as the tarfile class does
+# not support adding arbitrary archive entries without a real
+# filesystem object as a source.
+root = os.path.dirname(self.__arc_name)
+pub_dir = os.path.join("publisher", pfmri.publisher)
+pkg_dir = os.path.join(pub_dir, "pkg")
+for d in pub_dir, pkg_dir:
+if d not in self.__processed_pfiles:
+self.add(root, arcname=d)
+self.__processed_pfiles.add(d)
+# After manifest has been loaded, assume it's ok to queue the
+# manifest itself for addition to the archive.
+arcname = os.path.join(pkg_dir, pfmri.get_dir_path())
+# Entry may need to be added for manifest directory.
+man_dir = os.path.dirname(arcname)
+if man_dir not in self.__processed_pfiles:
+self.add(root, arcname=man_dir)
+self.__processed_pfiles.add(man_dir)
+# Entry needs to be added for manifest file.
+self.add(mpath, arcname=arcname)
+# Now add any files to the archive for every action that has a
+# payload.  (That payload can consist of multiple files.)
+file_dir = os.path.join(pub_dir, "file")
+for a in m.gen_actions():
+if not a.has_payload or not a.hash:
+# Nothing to archive.
+continue
+payloads = set([a.hash])
+# Signature actions require special handling.
+if a.name == "signature":
+payloads.update(a.attrs.get("chain",
+"").split())
+if repo:
+# This bit of logic only possible if
+# package source is a repository.
+pub = self.__pubs.get(pfmri.publisher,
+None)
+if not pub:
+self.__pubs[pfmri.publisher] = \
+pub = repo.get_publisher(
+pfmri.publisher)
+assert pub
+payloads.update(pub.signing_ca_certs)
+payloads.update(pub.intermediate_certs)
+if not payloads:
+# Nothing more to do.
+continue
+self.__add_publisher_files(root, file_dir, payloads,
+fpath=fpath, repo=repo)
+def add_package(self, pfmri, mpath, fpath):
+"""Queues the specified package for addition to the archive.
+The archive will be created and the package added to it when
+the close() method is called.  The package contents must not
+change after this method is called while the archive is open.
+Please note that, for signed packages, signing certificates
+used by the publisher are not automatically added to the
+archive.
+'pfmri' is the FMRI string or object identifying the package to
+add.
+'mpath' is the absolute path of the package manifest file.
+'fpath' is the directory containing the package files stored
+by hash.
+"""
+assert pfmri and mpath and fpath
+if isinstance(pfmri, basestring):
+pfmri = pkg.fmri.PkgFmri(pfmri)
+assert pfmri.publisher
+self.__add_package(pfmri, mpath, fpath=fpath)
+def add_signing_certs(self, pub, hashes, ca):
+"""Queues the specified publisher certs for addition to the
+archive. The archive will be created and the certs added to it
+when the close() method is called.  The cert contents must not
+change after this method is called while the archive is open.
+'pub' is the prefix of the publisher to store the package
+files for.
+'hashes' is the list of certificate hash files to store.
+(The certificate files must be in the same compressed format
+that the Repository class stores them in.)
+'ca' is a boolean indicating whether the certs are added as
+as CA certificates or intermediate certificates.
+"""
+root = os.path.dirname(self.__arc_name)
+pub_dir = os.path.join("publisher", pub)
+file_dir = os.path.join(pub_dir, "file")
+pubobj = self.__pubs.get(pub, None)
+if not pubobj:
+self.__pubs[pub] = pubobj = \
+pkg.client.publisher.Publisher(pub)
+for fname in hashes:
+hsh = os.path.basename(fname)
+self.__add_publisher_files(root, file_dir, [hsh],
+fpath=os.path.dirname(fname))
+if ca:
+pubobj.signing_ca_certs.append(hsh)
+else:
+pubobj.intermediate_certs.append(hsh)
+def add_repo_package(self, pfmri, repo):
+"""Queues the specified package in a repository for addition to
+the archive. The archive will be created and the package added
+to it when the close() method is called.  The package contents
+must not change after this method is called while the archive is
+open.
+'pfmri' is the FMRI string or object identifying the package to
+add.
+'repo' is the Repository object to use to retrieve the data for
+the package to be added to the archive.
+"""
+assert pfmri and repo
+if isinstance(pfmri, basestring):
+pfmri = pkg.fmri.PkgFmri(pfmri)
+assert pfmri.publisher
+self.__add_package(pfmri, repo.manifest(pfmri), repo=repo)
+def extract_catalog1(self, part, path, pub=None):
+"""Extract the named v1 catalog part to the specified directory.
+'part' is the name of the catalog file part.
+'path' is the absolute path of the directory to extract the
+file to.  It will be created automatically if it does not
+exist.
+'pub' is an optional publisher prefix.  If not provided, the
+first publisher catalog found in the archive will be used.
+"""
+# If the extraction index doesn't exist, scan the
+# complete archive and build one.
+self.__find_extract_offsets()
+pubs = [
+p for p in self.get_publishers()
+if not pub or p.prefix == pub
+]
+if not pubs:
+raise UnknownArchiveFiles(self.__arc_name, [part])
+if not pub:
+# Default to first known publisher.
+pub = pubs[0].prefix
+# Expected locations in archive for various metadata.
+# A trailing slash is appended so that archive entry
+# comparisons skip the entries for the directory.
+pubpath = os.path.join("publisher", pub) + os.path.sep
+catpath = os.path.join(pubpath, "catalog") + os.path.sep
+partpath = os.path.join(catpath, part)
+if pub in self.__catalogs:
+# Catalog file requested for this publisher before.
+croot = self.__catalogs[pub]
+if croot:
+# Catalog data is cached because it was
+# generated on demand, so just copy it
+# from there to the destination.
+src = os.path.join(croot, part)
+if not os.path.exists(src):
+raise UnknownArchiveFiles(
+self.__arc_name, [partpath])
+try:
+pkg.portable.copyfile(
+os.path.join(croot, part),
+os.path.join(path, part))
+except EnvironmentError, e:
+raise apx._convert_error(e)
+else:
+# Use default extraction logic.
+self.extract_to(partpath, path, filename=part)
+return
+# Determine whether any catalog files are present for this
+# publisher in the archive.
+for name in self.__extract_offsets:
+if name.startswith(catpath):
+# Any catalog file at all means this publisher
+# should be marked as being known to have one
+# and then the request passed on to extract_to.
+self.__catalogs[pub] = None
+return self.extract_to(partpath, path,
+filename=part)
+# No catalog data found for publisher; construct a catalog
+# in memory based on packages found for publisher.
+cat = pkg.catalog.Catalog(batch_mode=True, sign=False)
+manpath = os.path.join(pubpath, "pkg") + os.path.sep
+for name in self.__extract_offsets:
+if name.startswith(manpath) and name.count("/") == 4:
+ignored, stem, ver = name.rsplit("/", 2)
+stem = urllib.unquote(stem)
+ver = urllib.unquote(ver)
+pfmri = pkg.fmri.PkgFmri("%s@%s" % (stem, ver),
+publisher=pub)
+fobj = self.get_file(name)
+m = pkg.manifest.Manifest(pfmri=pfmri)
+m.set_content(content=fobj.read(),
+signatures=True)
+cat.add_package(pfmri, manifest=m)
+# Store catalog in a temporary directory and mark publisher
+# as having catalog data cached.
+croot = self.__mkdtemp()
+cat.meta_root = croot
+cat.batch_mode = False
+cat.finalize()
+cat.save()
+self.__catalogs[pub] = croot
+# Finally, copy requested file to destination.
+try:
+pkg.portable.copyfile(os.path.join(croot, part),
+os.path.join(path, part))
+except EnvironmentError, e:
+raise apx._convert_error(e)
+def extract_package_files(self, hashes, path, pub=None):
+"""Extract one or more package files from the archive.
+'hashes' is a list of the files to extract named by their hash.
+'path' is the absolute path of the directory to extract the
+files to.  It will be created automatically if it does not
+exist.
+'pub' is the prefix (name) of the publisher that the package
+files are associated with.  If not provided, the first file
+named after the given hash found in the archive will be used.
+(This will be noticeably slower depending on the size of the
+archive.)
+"""
+assert not self.__closed and "r" in self.__mode
+assert hashes
+# If the extraction index doesn't exist, scan the complete
+# archive and build one.
+self.__find_extract_offsets()
+if not pub:
+# Scan extract offsets index for the first instance of
+# any package file seen for each hash and extract the
+# file as each is found.
+hashes = set(hashes)
+for name in self.__extract_offsets:
+for fhash in hashes:
+hash_fname = os.path.join("file",
+fhash[:2], fhash)
+if name.endswith(hash_fname):
+self.extract_to(name, path,
+filename=fhash)
+hashes.discard(fhash)
+break
+if not hashes:
+break
+if hashes:
+# Any remaining hashes are for package files
+# that couldn't be found.
+raise UnknownArchiveFiles(self.__arc_name,
+hashes)
+return
+for fhash in hashes:
+arcname = os.path.join("publisher", pub, "file",
+fhash[:2], fhash)
+self.extract_to(arcname, path, filename=fhash)
+def extract_package_manifest(self, pfmri, path, filename=""):
+"""Extract a package manifest from the archive.
+'pfmri' is the FMRI string or object identifying the package
+manifest to extract.
+'path' is the absolute path of the directory to extract the
+manifest to.  It will be created automatically if it does not
+exist.
+'filename' is an optional name to use for the extracted file.
+If not provided, the default behaviour is to create a directory
+named after the package stem in 'path' and a file named after
+the version in that directory; both components will be URI
+encoded.
+"""
+assert not self.__closed and "r" in self.__mode
+assert pfmri and path
+if isinstance(pfmri, basestring):
+pfmri = pkg.fmri.PkgFmri(pfmri)
+assert pfmri.publisher
+if not filename:
+filename = pfmri.get_dir_path()
+arcname = os.path.join("publisher", pfmri.publisher, "pkg",
+pfmri.get_dir_path())
+try:
+self.extract_to(arcname, path, filename=filename)
+except UnknownArchiveFiles:
+raise UnknownPackageManifest(self.__arc_name, pfmri)
+def extract_to(self, src, path, filename=""):
+"""Extract a member from the archive.
+'src' is the pathname of the archive file to extract.
+'path' is the absolute path of the directory to extract the file
+to.
+'filename' is an optional string indicating the name to use for
+the extracted file.  If not provided, the full member name in
+the archive will be used.
+"""
+assert not self.__closed and "r" in self.__mode
+# Get the offset in the archive for the given file, and then
+# seek to it.
+offset = self.__extract_offsets.get(src, None)
+tfile = self.__arc_tfile
+if offset is not None:
+# Prepare the tarfile object for extraction by telling
+# it where to look for the file.
+self.__arc_file.seek(offset)
+tfile.offset = offset
+# Get the tarinfo object needed to extract the file.
+try:
+member = tf.TarInfo.fromtarfile(tfile)
+except tf.TarError:
+# Read error encountered.
+raise InvalidArchive(self.__arc_name)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+if member.name != src:
+# Index must be invalid or tarfile has gone off
+# the rails trying to read the archive.
+raise InvalidArchive(self.__arc_name)
+elif self.__extract_offsets:
+# Assume there is no such archive member if extract
+# offsets are known, but the item can't be found.
+raise UnknownArchiveFiles(self.__arc_name, [src])
+else:
+# No archive index; fallback to retrieval by name.
+member = src
+# Extract the file to the specified location.
+try:
+self.__arc_tfile.extract_to(member, path=path,
+filename=filename)
+except KeyError:
+raise UnknownArchiveFiles(self.__arc_name, [src])
+except tf.TarError:
+# Read error encountered.
+raise InvalidArchive(self.__arc_name)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+if not isinstance(member, tf.TarInfo):
+# Nothing more to do.
+return
+# If possible, validate the size of the extracted object.
+try:
+if not filename:
+filename = member.name
+dest = os.path.join(path, filename)
+if os.stat(dest).st_size != member.size:
+raise CorruptArchiveFiles(self.__arc_name,
+[src])
+except EnvironmentError, e:
+raise apx._convert_error(e)
+def get_file(self, src):
+"""Returns an archive member as a file object.  If the matching
+member is a regular file, a file-like object will be returned.
+If it is a link, a file-like object is constructed from the
+link's target.  In all other cases, None will be returned.  The
+file-like object is read-only and provides methods: read(),
+readline(), readlines(), seek() and tell().  The returned object
+must be closed before the archive is, and must not be used after
+the archive is closed.
+'src' is the pathname of the archive file to return.
+"""
+assert not self.__closed and "r" in self.__mode
+# Get the offset in the archive for the given file, and then
+# seek to it.
+offset = self.__extract_offsets.get(src, None)
+tfile = self.__arc_tfile
+if offset is not None:
+# Prepare the tarfile object for extraction by telling
+# it where to look for the file.
+self.__arc_file.seek(offset)
+tfile.offset = offset
+# Get the tarinfo object needed to extract the file.
+member = tf.TarInfo.fromtarfile(tfile)
+elif self.__extract_offsets:
+# Assume there is no such archive member if extract
+# offsets are known, but the item can't be found.
+raise UnknownArchiveFiles(self.__arc_name, [src])
+else:
+# No archive index; fallback to retrieval by name.
+member = src
+# Finally, return the object for the matching archive member.
+try:
+return tfile.extractfile(member)
+except KeyError:
+raise UnknownArchiveFiles(self.__arc_name, [src])
+def get_package_file(self, fhash, pub=None):
+"""Returns the first package file matching the given hash as a
+file-like object. The file-like object is read-only and provides
+methods: read(), readline(), readlines(), seek() and tell().
+The returned object  must be closed before the archive is, and
+must not be used after the archive is closed.
+'fhash' is the hash name of the file to return.
+'pub' is the prefix (name) of the publisher that the package
+files are associated with.  If not provided, the first file
+named after the given hash found in the archive will be used.
+(This will be noticeably slower depending on the size of the
+archive.)
+"""
+assert not self.__closed and "r" in self.__mode
+if not self.__extract_offsets:
+# If the extraction index doesn't exist, scan the
+# complete archive and build one.
+self.__find_extract_offsets()
+if not pub:
+# Scan extract offsets index for the first instance of
+# any package file seen for the hash and extract it.
+hash_fname = os.path.join("file", fhash[:2], fhash)
+for name in self.__extract_offsets:
+if name.endswith(hash_fname):
+return self.get_file(name)
+raise UnknownArchiveFiles(self.__arc_name, [fhash])
+return self.get_file(os.path.join("publisher", pub, "file",
+fhash[:2], fhash))
+def get_package_manifest(self, pfmri, raw=False):
+"""Returns a package manifest from the archive.
+'pfmri' is the FMRI string or object identifying the package
+manifest to extract.
+'raw' is an optional boolean indicating whether the raw
+content of the Manifest should be returned.  If True,
+a file-like object containing the content of the manifest.
+If False, a Manifest object will be returned.
+"""
+assert not self.__closed and "r" in self.__mode
+assert pfmri
+if isinstance(pfmri, basestring):
+pfmri = pkg.fmri.PkgFmri(pfmri)
+assert pfmri.publisher
+arcname = os.path.join("publisher", pfmri.publisher, "pkg",
+pfmri.get_dir_path())
+try:
+fobj = self.get_file(arcname)
+except UnknownArchiveFiles:
+raise UnknownPackageManifest(self.__arc_name, pfmri)
+if raw:
+return fobj
+m = pkg.manifest.Manifest(pfmri=pfmri)
+m.set_content(content=fobj.read(), signatures=True)
+return m
+def get_publishers(self):
+"""Return a list of publisher objects for all publishers used
+in the archive."""
+if self.__pubs:
+return self.__pubs.values()
+# If the extraction index doesn't exist, scan the complete
+# archive and build one.
+self.__find_extract_offsets()
+# Search through offset index to find publishers
+# in use.
+self.__pubs = {}
+for name in self.__extract_offsets:
+if name.count("/") == 1 and \
+name.startswith("publisher/"):
+ignored, pfx = name.split("/", 1)
+# See if this publisher has a .p5i file in the
+# archive (needed for signed packages).
+p5iname = os.path.join("publisher", pfx,
+"pub.p5i")
+try:
+fobj = self.get_file(p5iname)
+except UnknownArchiveFiles:
+# No p5i; that's ok.
+pub = pkg.client.publisher.Publisher(
+pfx)
+else:
+pubs = pkg.p5i.parse(fileobj=fobj)
+assert len(pubs) == 1
+pub = pubs[0][0]
+assert pub
+self.__pubs[pfx] = pub
+return self.__pubs.values()
+def __cleanup(self):
+"""Private helper method to cleanup temporary files."""
+try:
+if os.path.exists(self.__temp_dir):
+shutil.rmtree(self.__temp_dir)
+except EnvironmentError, e:
+raise apx._convert_error(e)
+def __close_fh(self):
+"""Private helper method to close filehandles."""
+# Some archives may not have an index.
+if self.__index:
+self.__index.close()
+self.__index = None
+# A read error during archive load may cause these to have
+# never been set.
+if self.__arc_tfile:
+self.__arc_tfile.close()
+self.__arc_tfile = None
+if self.__arc_file:
+self.__arc_file.close()
+self.__arc_file = None
+self.__closed = True
+def close(self, progtrack=None):
+"""If mode is 'r', this will close the archive file.  If mode is
+'w', this will write all queued files to the archive and close
+it.  Further operations on the archive are not possible after
+calling this function."""
+assert not self.__closed
+if "w" not in self.__mode:
+self.__close_fh()
+self.__cleanup()
+return
+# Add the standard pkg5.repository file before closing the
+# index.
+fobj, fname = self.__mkstemp()
+fobj.write("[CONFIGURATION]\nversion = 4\n\n"
+"[publisher]\nprefix = %s\n\n"
+"[repository]\nversion = 4\n" % self.__default_pub)
+fobj.close()
+self.add(fname, arcname="pkg5.repository")
+# If any publisher objects were cached, then there were
+# signed packages present, and p5i information for each
+# must be added to the archive so that the client can
+# handle signing ca and intermediate certs.
+for pub in self.__pubs.values():
+# A new publisher object is created with a copy of only
+# the information that's needed for the archive.
+npub = pkg.client.publisher.Publisher(pub.prefix,
+alias=pub.alias, ca_certs=pub.signing_ca_certs,
+intermediate_certs=pub.intermediate_certs,
+revoked_ca_certs=pub.revoked_ca_certs,
+approved_ca_certs=pub.approved_ca_certs)
+# Create a p5i file.
+fobj, fn = self.__mkstemp()
+pkg.p5i.write(fobj, [npub])
+fobj.close()
+# Queue the p5i file for addition to the archive.
+arcname = os.path.join("publisher", npub.prefix,
+"pub.p5i")
+self.add(fn, arcname=arcname)
+# Close the index; no more entries can be added.
+self.__index.close()
+# If a tracker was provided, setup a progress goal.
+idxbytes = 0
+if progtrack:
+nfiles = len(self.__queue)
+nbytes = self.__queue_offset
+try:
+fs = os.stat(self.__index.pathname)
+nfiles += 1
+idxbytes = fs.st_size
+nbytes += idxbytes
+except EnvironmentError, e:
+raise apx._convert_error(e)
+progtrack.archive_set_goal(
+os.path.basename(self.__arc_name), nfiles,
+nbytes)
+# Add the index file to the archive as the first file; it will
+# automatically be marked with a comment identifying the index
+# version.
+tfile = self.__arc_tfile
+tfile.add(self.__index.pathname, arcname=self.__idx_name)
+if progtrack:
+progtrack.archive_add_progress(1, idxbytes)
+self.__index = None
+# Add all queued files to the archive.
+while self.__queue:
+src, arcname = self.__queue.popleft()
+start_offset = tfile.offset
+tfile.add(src, arcname=arcname, recursive=False)
+# tarfile caches member information for every item
+# added by default, which provides fast access to the
+# archive contents after generation, but isn't needed
+# here (and uses a significant amount of memory).
+# Plus popping it off the stack here allows use of
+# the object's info to provide progress updates.
+ti = tfile.members.pop()
+if progtrack:
+progtrack.archive_add_progress(1,
+tfile.offset - start_offset)
+ti.tarfile = None
+del ti
+# Cleanup temporary files.
+self.__cleanup()
+# Archive created; success!
+if progtrack:
+progtrack.archive_done()
+self.__close_fh()
+@property
+def pathname(self):
+"""The absolute path of the archive file."""
+return self.__arc_name

changeset 2219	60ad60f7592c
child 2286	938fbb350ad2