13241 pkg install triggers system incorporation update
13968 WOS_PKGS path in distro-import/Makefile doesn't exist after build 130
#!/usr/bin/python
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
"""Interfaces and implementation for the Catalog object, as well as functions
that operate on lists of package FMRIs."""
import copy
import calendar
import datetime
import errno
import hashlib
import os
try:
# Some versions of python don't have these constants.
os.SEEK_SET
except AttributeError:
os.SEEK_SET, os.SEEK_CUR, os.SEEK_END = range(3)
import simplejson as json
import stat
import statvfs
import threading
import pkg.actions
import pkg.client.api_errors as api_errors
import pkg.fmri as fmri
import pkg.misc as misc
import pkg.portable as portable
import pkg.version
from operator import itemgetter
from pkg.misc import EmptyI
class _JSONWriter(object):
"""Private helper class used to serialize catalog data and generate
signatures."""
def __init__(self, data, pathname=None, sign=True):
self.__data = data
self.__fileobj = None
# Default to a 32K buffer.
self.__bufsz = 32 * 1024
if sign:
if not pathname:
# Only needed if not writing to __fileobj.
self.__sha_1 = hashlib.sha1()
self.__sha_1_value = None
self.__sign = sign
self.pathname = pathname
if not pathname:
return
# Call statvfs to find optimal blocksize for destination.
dest_dir = os.path.dirname(self.pathname)
try:
destvfs = os.statvfs(dest_dir)
# Set the file buffer size to the blocksize of our
# filesystem.
self.__bufsz = destvfs[statvfs.F_BSIZE]
except EnvironmentError, e:
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
except AttributeError, e:
# os.statvfs is not available on some platforms.
pass
try:
tfile = open(pathname, "wb", self.__bufsz)
except EnvironmentError, e:
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
raise
self.__fileobj = tfile
def signatures(self):
"""Returns a dictionary mapping digest algorithms to the
hex-encoded digest values of the text of the catalog."""
if not self.__sign:
return {}
return { "sha-1": self.__sha_1_value }
def save(self):
"""Serializes and stores the provided data in JSON format."""
# sort_keys is necessary to ensure consistent signature
# generation. It has a minimal performance cost as well (on
# on SPARC and x86), so shouldn't be an issue. However, it
# is only needed if the caller has indicated that the content
# should be signed.
# Whenever possible, avoid using the write wrapper (self) as
# this can greatly increase write times.
out = self.__fileobj
if not out:
out = self
json.dump(self.__data, out, check_circular=False,
separators=(",", ":"), sort_keys=self.__sign)
out.write("\n")
if self.__fileobj:
self.__fileobj.close()
if not self.__sign or not self.__fileobj:
# Can't sign unless a file object is provided. And if
# one is provided, but no signing is to be done, then
# ensure the fileobject is discarded.
self.__fileobj = None
if self.__sign:
self.__sha_1_value = self.__sha_1.hexdigest()
return
# Ensure file object goes out of scope.
self.__fileobj = None
# Calculating sha-1 this way is much faster than intercepting
# write calls because of the excessive number of write calls
# that json.dump() triggers (1M+ for /dev catalog files).
self.__sha_1_value = misc.get_data_digest(self.pathname)[0]
# Open the JSON file so that the signature data can be added.
sfile = file(self.pathname, "rb+", self.__bufsz)
# The last bytes should be "}\n", which is where the signature
# data structure needs to be appended.
sfile.seek(-2, os.SEEK_END)
# Add the signature data and close.
sfile.write(',"_SIGNATURE":')
json.dump(self.signatures(), sfile, check_circular=False,
separators=(",", ":"))
sfile.write("}\n")
sfile.close()
def write(self, data):
"""Wrapper function that should not be called by external
consumers."""
if self.__sign:
self.__sha_1.update(data)
class CatalogPartBase(object):
"""A CatalogPartBase object is an abstract class containing core
functionality shared between CatalogPart and CatalogAttrs."""
# The file mode to be used for all catalog files.
__file_mode = stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IROTH
__meta_root = None
last_modified = None
loaded = False
name = None
sign = True
signatures = None
def __init__(self, name, meta_root=None, sign=True):
"""Initializes a CatalogPartBase object."""
self.meta_root = meta_root
self.name = name
self.sign = sign
self.signatures = {}
if not self.meta_root or not self.exists:
# Operations shouldn't attempt to load the part data
# unless meta_root is defined and the data exists.
self.loaded = True
self.last_modified = datetime.datetime.utcnow()
else:
self.last_modified = self.__last_modified()
@staticmethod
def _gen_signatures(data):
f = _JSONWriter(data)
f.save()
return f.signatures()
def __get_meta_root(self):
return self.__meta_root
def __last_modified(self):
"""A UTC datetime object representing the time the file used to
to store object metadata was modified, or None if it does not
exist yet."""
if not self.exists:
return None
try:
mod_time = os.stat(self.pathname).st_mtime
except EnvironmentError, e:
if e.errno == errno.ENOENT:
return None
raise
return datetime.datetime.utcfromtimestamp(mod_time)
def __set_meta_root(self, path):
if path:
path = os.path.abspath(path)
self.__meta_root = path
def destroy(self):
"""Removes any on-disk files that exist for the catalog part and
discards all content."""
if self.pathname:
if os.path.exists(self.pathname):
try:
portable.remove(self.pathname)
except EnvironmentError, e:
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
raise
self.signatures = {}
self.loaded = False
self.last_modified = None
@property
def exists(self):
"""A boolean value indicating wheher a file for the catalog part
exists at <self.meta_root>/<self.name>."""
if not self.pathname:
return False
return os.path.exists(self.pathname)
def load(self):
"""Load the serialized data for the catalog part and return the
resulting structure."""
location = os.path.join(self.meta_root, self.name)
try:
fobj = file(location, "rb")
except EnvironmentError, e:
if e.errno == errno.ENOENT:
raise api_errors.RetrievalError(e,
location=location)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
raise
try:
struct = json.load(fobj)
except EnvironmentError, e:
raise api_errors.RetrievalError(e)
except ValueError, e:
# Not a valid catalog file.
raise api_errors.InvalidCatalogFile(location)
self.loaded = True
# Signature data, if present, should be removed from the struct
# on load and then stored in the signatures object property.
self.signatures = struct.pop("_SIGNATURE", {})
return struct
@property
def pathname(self):
"""The absolute path of the file used to store the data for
this part or None if meta_root or name is not set."""
if not self.meta_root or not self.name:
return None
return os.path.join(self.meta_root, self.name)
def save(self, data):
"""Serialize and store the transformed catalog part's 'data' in
a file using the pathname <self.meta_root>/<self.name>.
'data' must be a dict."""
f = _JSONWriter(data, pathname=self.pathname, sign=self.sign)
f.save()
# Update in-memory copy to reflect stored data.
self.signatures = f.signatures()
# Ensure the permissions on the new file are correct.
try:
os.chmod(self.pathname, self.__file_mode)
except EnvironmentError, e:
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
raise
# Finally, set the file times to match the last catalog change.
if self.last_modified:
mtime = calendar.timegm(
self.last_modified.utctimetuple())
os.utime(self.pathname, (mtime, mtime))
meta_root = property(__get_meta_root, __set_meta_root)
class CatalogPart(CatalogPartBase):
"""A CatalogPart object is the representation of a subset of the package
FMRIs available from a package repository."""
__data = None
ordered = None
def __init__(self, name, meta_root=None, ordered=True, sign=True):
"""Initializes a CatalogPart object."""
self.__data = {}
self.ordered = ordered
CatalogPartBase.__init__(self, name, meta_root=meta_root,
sign=sign)
def __iter_entries(self, last=False, ordered=False, pubs=EmptyI):
"""Private generator function to iterate over catalog entries.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the CatalogPart has been saved since the last
modifying operation, or sort() has has been called, this will
also be the newest version of the package.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
self.load()
if ordered:
stems = self.pkg_names(pubs=pubs)
else:
stems = (
(pub, stem)
for pub in self.publishers(pubs=pubs)
for stem in self.__data[pub]
)
if last:
return (
(pub, stem, self.__data[pub][stem][-1])
for pub, stem in stems
)
if ordered:
return (
(pub, stem, entry)
for pub, stem in stems
for entry in reversed(self.__data[pub][stem])
)
return (
(pub, stem, entry)
for pub, stem in stems
for entry in self.__data[pub][stem]
)
def add(self, pfmri=None, metadata=None, op_time=None, pub=None,
stem=None, ver=None):
"""Add a catalog entry for a given FMRI or FMRI components.
'metadata' is an optional dict containing the catalog
metadata that should be stored for the specified FMRI.
The dict representing the entry is returned to callers,
but should not be modified.
"""
assert pfmri or (pub and stem and ver)
if pfmri and not pfmri.publisher:
raise api_errors.AnarchicalCatalogFMRI(str(pfmri))
if not self.loaded:
# Hot path, so avoid calling load unless necessary, even
# though it performs this check already.
self.load()
if pfmri:
pub, stem, ver = pfmri.tuple()
ver = str(ver)
pkg_list = self.__data.setdefault(pub, {})
ver_list = pkg_list.setdefault(stem, [])
for entry in ver_list:
if entry["version"] == ver:
if not pfmri:
pfmri = "pkg://%s/%s@%s" % (pub, stem,
ver)
raise api_errors.DuplicateCatalogEntry(
pfmri, operation="add",
catalog_name=self.pathname)
if metadata is not None:
entry = metadata
else:
entry = {}
entry["version"] = ver
ver_list.append(entry)
if self.ordered:
self.sort(pfmris=set([pfmri]))
if not op_time:
op_time = datetime.datetime.utcnow()
self.last_modified = op_time
self.signatures = {}
return entry
def destroy(self):
"""Removes any on-disk files that exist for the catalog part and
discards all content."""
self.__data = {}
return CatalogPartBase.destroy(self)
def entries(self, cb=None, last=False, ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the form
(fmri, entry) as it iterates over the contents of the catalog
part (where entry is the related catalog entry for the fmri).
Callers should not modify any of the data that is returned.
'cb' is an optional callback function that will be executed for
each package. It must accept two arguments: 'pkg' and 'entry'.
'pkg' is an FMRI object and 'entry' is the dictionary structure
of the catalog entry for the package. If the callback returns
False, then the entry will not be included in the results.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the CatalogPart has been saved since the last
modifying operation, or sort() has has been called, this will
also be the newest version of the package.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to.
Results are always in catalog version order on a per-
publisher, per-stem basis.
"""
for pub, stem, entry in self.__iter_entries(last=last,
ordered=ordered, pubs=pubs):
f = fmri.PkgFmri("%s@%s" % (stem, entry["version"]),
publisher=pub)
if cb is None or cb(f, entry):
yield f, entry
def entries_by_version(self, name, pubs=EmptyI):
"""A generator function that produces tuples of (version,
entries), where entries is a list of tuples of the format
(fmri, entry) where entry is the catalog entry for the
FMRI) as it iterates over the CatalogPart contents.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
self.load()
versions = {}
entries = {}
for pub in self.publishers(pubs=pubs):
ver_list = self.__data[pub].get(name, ())
for entry in ver_list:
sver = entry["version"]
pfmri = fmri.PkgFmri("%s@%s" % (name,
sver), publisher=pub)
versions[sver] = pfmri.version
entries.setdefault(sver, [])
entries[sver].append((pfmri, entry))
for key, ver in sorted(versions.iteritems(), key=itemgetter(1)):
yield ver, entries[key]
def fmris(self, last=False, objects=True, ordered=False, pubs=EmptyI):
"""A generator function that produces FMRIs as it iterates
over the contents of the catalog part.
'last' is a boolean value that indicates only the last fmri
for each package on a per-publisher basis should be returned.
As long as the CatalogPart has been saved since the last
modifying operation, or sort() has has been called, this will
also be the newest version of the package.
'objects' is an optional boolean value indicating whether
FMRIs should be returned as FMRI objects or as strings.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to.
Results are always in catalog version order on a per-
publisher, per-stem basis."""
if objects:
for pub, stem, entry in self.__iter_entries(last=last,
ordered=ordered, pubs=pubs):
yield fmri.PkgFmri("%s@%s" % (stem,
entry["version"]), publisher=pub)
return
for pub, stem, entry in self.__iter_entries(last=last,
ordered=ordered, pubs=pubs):
yield "pkg://%s/%s@%s" % (pub,
stem, entry["version"])
return
def fmris_by_version(self, name, pubs=EmptyI):
"""A generator function that produces tuples of (version,
fmris), where fmris is a list of the fmris related to the
version.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
self.load()
versions = {}
entries = {}
for pub in self.publishers(pubs=pubs):
ver_list = self.__data[pub].get(name, None)
if not ver_list:
continue
for entry in ver_list:
sver = entry["version"]
pfmri = fmri.PkgFmri("%s@%s" % (name,
sver), publisher=pub)
versions[sver] = pfmri.version
entries.setdefault(sver, [])
entries[sver].append(pfmri)
for key, ver in sorted(versions.iteritems(), key=itemgetter(1)):
yield ver, entries[key]
def get_entry(self, pfmri=None, pub=None, stem=None, ver=None):
"""Returns the catalog part entry for the given package FMRI or
FMRI components."""
assert pfmri or (pub and stem and ver)
if pfmri and not pfmri.publisher:
raise api_errors.AnarchicalCatalogFMRI(str(pfmri))
# Since this is a hot path, this function checks for loaded
# status before attempting to call the load function.
if not self.loaded:
self.load()
if pfmri:
pub, stem, ver = pfmri.tuple()
ver = str(ver)
pkg_list = self.__data.get(pub, None)
if not pkg_list:
return
ver_list = pkg_list.get(stem, ())
for entry in ver_list:
if entry["version"] == ver:
return entry
def get_package_counts(self):
"""Returns a tuple of integer values (package_count,
package_version_count). The first is the number of
unique packages (per-publisher), and the second is the
number of unique package versions (per-publisher and
stem)."""
self.load()
package_count = 0
package_version_count = 0
for pub in self.publishers():
for stem in self.__data[pub]:
package_count += 1
package_version_count += \
len(self.__data[pub][stem])
return (package_count, package_version_count)
def load(self):
"""Load and transform the catalog part's data, preparing it
for use."""
if self.loaded:
# Already loaded, or only in-memory.
return
self.__data = CatalogPartBase.load(self)
def names(self, pubs=EmptyI):
"""Returns a set containing the names of all the packages in
the CatalogPart.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
self.load()
return set((
stem
for pub in self.publishers(pubs=pubs)
for stem in self.__data[pub]
))
def pkg_names(self, pubs=EmptyI):
"""A generator function that produces package tuples of the form
(pub, stem) as it iterates over the contents of the CatalogPart.
'pubs' is an optional list of publisher prefixes to restrict
the results to.
Results are always returned sorted by stem and then by
publisher."""
self.load()
# Results have to be sorted by stem first, and by
# publisher prefix second.
pkg_list = [
"%s!%s" % (stem, pub)
for pub in self.publishers(pubs=pubs)
for stem in self.__data[pub]
]
for entry in sorted(pkg_list):
stem, pub = entry.split("!", 1)
yield pub, stem
def publishers(self, pubs=EmptyI):
"""A generator function that returns publisher prefixes as it
iterates over the package data in the CatalogPart.
'pubs' is an optional list that contains the prefixes of the
publishers to restrict the results to."""
self.load()
for pub in self.__data:
# Any entries starting with "_" are part of the
# reserved catalog namespace.
if not pub[0] == "_" and (not pubs or pub in pubs):
yield pub
def remove(self, pfmri, op_time=None):
"""Remove a package and its metadata."""
if not pfmri.publisher:
raise api_errors.AnarchicalCatalogFMRI(pfmri.get_fmri())
self.load()
pkg_list = self.__data.get(pfmri.publisher, None)
if not pkg_list:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
ver = str(pfmri.version)
ver_list = pkg_list.get(pfmri.pkg_name, [])
for i, entry in enumerate(ver_list):
if entry["version"] == ver:
# Safe to do this since a 'break' is done
# immediately after removals are performed.
del ver_list[i]
if not ver_list:
# When all version entries for a
# package are removed, its stem
# should be also.
del pkg_list[pfmri.pkg_name]
if not pkg_list:
# When all package stems for a
# publisher have been removed,
# it should be also.
del self.__data[pfmri.publisher]
break
else:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
if not op_time:
op_time = datetime.datetime.utcnow()
self.last_modified = op_time
self.signatures = {}
def save(self):
"""Transform and store the catalog part's data in a file using
the pathname <self.meta_root>/<self.name>."""
if not self.meta_root:
# Assume this is in-memory only.
return
CatalogPartBase.save(self, self.__data)
def sort(self, pfmris=None, pubs=None):
"""Re-sorts the contents of the CatalogPart such that version
entries for each package stem are in ascending order.
'pfmris' is an optional set of FMRIs to restrict the sort to.
This is useful during catalog operations as only entries for
the corresponding package stem(s) need to be sorted.
'pubs' is an optional set of publisher prefixes to restrict
the sort to. This is useful during catalog operations as only
entries for the corresponding publisher stem(s) need to be
sorted. This option has no effect if 'pfmris' is also
provided.
If neither 'pfmris' or 'pubs' is provided, all entries will be
sorted."""
def order(a, b):
# XXX version requires build string; 5.11 is not sane.
v1 = pkg.version.Version(a["version"], "5.11")
v2 = pkg.version.Version(b["version"], "5.11")
return cmp(v1, v2)
self.load()
if pfmris is not None:
processed = set()
for f in pfmris:
pkg_stem = f.get_pkg_stem()
if pkg_stem in processed:
continue
processed.add(pkg_stem)
# The specified FMRI may not exist in this
# CatalogPart, so continue if it does not
# exist.
pkg_list = self.__data.get(f.publisher, None)
if pkg_list:
ver_list = pkg_list.get(f.pkg_name,
None)
if ver_list:
ver_list.sort(cmp=order)
return
for pub in self.publishers(pubs=pubs):
for stem in self.__data[pub]:
self.__data[pub][stem].sort(cmp=order)
def tuples(self, last=False, ordered=False, pubs=EmptyI):
"""A generator function that produces FMRI tuples as it
iterates over the contents of the catalog part.
'last' is a boolean value that indicates only the last FMRI
tuple for each package on a per-publisher basis should be
returned. As long as the CatalogPart has been saved since
the last modifying operation, or sort() has has been called,
this will also be the newest version of the package.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
return (
(pub, stem, entry["version"])
for pub, stem, entry in self.__iter_entries(last=last,
ordered=ordered, pubs=pubs)
)
def tuple_entries(self, cb=None, last=False, ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the form ((pub,
stem, version), entry) as it iterates over the contents of the
catalog part (where entry is the related catalog entry for the
fmri). Callers should not modify any of the data that is
returned.
'cb' is an optional callback function that will be executed for
each package. It must accept two arguments: 'pkg' and 'entry'.
'pkg' is an FMRI tuple and 'entry' is the dictionary structure
of the catalog entry for the package. If the callback returns
False, then the entry will not be included in the results.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the CatalogPart has been saved since the last
modifying operation, or sort() has has been called, this will
also be the newest version of the package.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to.
Results are always in catalog version order on a per-publisher,
per-stem basis."""
for pub, stem, entry in self.__iter_entries(last=last,
ordered=ordered, pubs=pubs):
t = (pub, stem, entry["version"])
if cb is None or cb(t, entry):
yield t, entry
def validate(self, signatures=None):
"""Verifies whether the signatures for the contents of the
CatalogPart match the specified signature data, or if not
provided, the current signature data. Raises the exception
named 'BadCatalogSignatures' on failure."""
if not self.signatures and not signatures:
# Nothing to validate.
return
if not signatures:
signatures = self.signatures
new_signatures = self._gen_signatures(self.__data)
if new_signatures != signatures:
raise api_errors.BadCatalogSignatures(self.pathname)
class CatalogUpdate(CatalogPartBase):
"""A CatalogUpdate object is an augmented representation of a subset
of the package data contained within a Catalog."""
# Properties.
__data = None
last_modified = None
# Operation constants.
ADD = "add"
REMOVE = "remove"
def __init__(self, name, meta_root=None, sign=True):
"""Initializes a CatalogUpdate object."""
self.__data = {}
CatalogPartBase.__init__(self, name, meta_root=meta_root,
sign=sign)
def add(self, pfmri, operation, op_time, metadata=None):
"""Records the specified catalog operation and any related
catalog metadata for the specified package FMRI.
'operation' must be one of the following constant values
provided by the CatalogUpdate class:
ADD
REMOVE
'op_time' is a UTC datetime object indicating the time
the catalog operation was performed.
'metadata' is an optional dict containing the catalog
metadata that should be stored for the specified FMRI
indexed by catalog part (e.g. "dependency", "summary",
etc.)."""
if not pfmri.publisher:
raise api_errors.AnarchicalCatalogFMRI(pfmri.get_fmri())
if operation not in (self.ADD, self.REMOVE):
raise api_errors.UnknownUpdateType(operation)
self.load()
self.__data.setdefault(pfmri.publisher, {})
pkg_list = self.__data[pfmri.publisher]
pkg_list.setdefault(pfmri.pkg_name, [])
ver_list = pkg_list[pfmri.pkg_name]
if metadata is not None:
entry = metadata
else:
entry = {}
entry["op-time"] = datetime_to_basic_ts(op_time)
entry["op-type"] = operation
entry["version"] = str(pfmri.version)
ver_list.append(entry)
# To ensure the update log is viewed as having been updated
# at the exact same time as the catalog, the last_modified
# time of the update log must match the operation time.
self.last_modified = op_time
self.signatures = {}
def load(self):
"""Load and transform the catalog update's data, preparing it
for use."""
if self.loaded:
# Already loaded, or only in-memory.
return
self.__data = CatalogPartBase.load(self)
def publishers(self):
"""A generator function that returns publisher prefixes as it
iterates over the package data in the CatalogUpdate."""
self.load()
for pub in self.__data:
# Any entries starting with "_" are part of the
# reserved catalog namespace.
if not pub[0] == "_":
yield pub
def save(self):
"""Transform and store the catalog update's data in a file using
the pathname <self.meta_root>/<self.name>."""
if not self.meta_root:
# Assume this is in-memory only.
return
CatalogPartBase.save(self, self.__data)
def updates(self):
"""A generator function that produces tuples of the format
(fmri, op_type, op_time, metadata). Where:
* 'fmri' is a PkgFmri object for the package.
* 'op_type' is a CatalogUpdate constant indicating
the catalog operation performed.
* 'op_time' is a UTC datetime object representing the
time time the catalog operation was performed.
* 'metadata' is a dict containing the catalog metadata
for the FMRI indexed by catalog part name.
Results are always in ascending operation time order on a
per-publisher, per-stem basis.
"""
self.load()
def get_update(pub, stem, entry):
mdata = {}
for key in entry:
if key.startswith("catalog."):
mdata[key] = entry[key]
op_time = basic_ts_to_datetime(entry["op-time"])
pfmri = fmri.PkgFmri("%s@%s" % (stem, entry["version"]),
publisher=pub)
return (pfmri, entry["op-type"], op_time, mdata)
for pub in self.publishers():
for stem in self.__data[pub]:
for entry in self.__data[pub][stem]:
yield get_update(pub, stem, entry)
return
def validate(self, signatures=None):
"""Verifies whether the signatures for the contents of the
CatalogUpdate match the specified signature data, or if not
provided, the current signature data. Raises the exception
named 'BadCatalogSignatures' on failure."""
if not self.signatures and not signatures:
# Nothing to validate.
return
if not signatures:
signatures = self.signatures
new_signatures = self._gen_signatures(self.__data)
if new_signatures != signatures:
raise api_errors.BadCatalogSignatures(self.pathname)
class CatalogAttrs(CatalogPartBase):
"""A CatalogAttrs object is the representation of the attributes of a
Catalog object."""
# Properties.
__data = None
def __init__(self, meta_root=None, sign=True):
"""Initializes a CatalogAttrs object."""
self.__data = {}
CatalogPartBase.__init__(self, name="catalog.attrs",
meta_root=meta_root, sign=sign)
if self.loaded:
# If the data is already seen as 'loaded' during init,
# this is actually a new object, so setup some sane
# defaults.
created = self.__data["last-modified"]
self.__data = {
"created": created,
"last-modified": created,
"package-count": 0,
"package-version-count": 0,
"parts": {},
"updates": {},
"version": 1,
}
else:
# Assume that the attributes of the catalog can be
# obtained from a file.
self.load()
def __get_created(self):
return self.__data["created"]
def __get_last_modified(self):
return self.__data["last-modified"]
def __get_package_count(self):
return self.__data["package-count"]
def __get_package_version_count(self):
return self.__data["package-version-count"]
def __get_parts(self):
return self.__data["parts"]
def __get_updates(self):
return self.__data["updates"]
def __get_version(self):
return self.__data["version"]
def __set_created(self, value):
self.__data["created"] = value
self.signatures = {}
def __set_last_modified(self, value):
self.__data["last-modified"] = value
self.signatures = {}
def __set_package_count(self, value):
self.__data["package-count"] = value
self.signatures = {}
def __set_package_version_count(self, value):
self.__data["package-version-count"] = value
self.signatures = {}
def __set_parts(self, value):
self.__data["parts"] = value
self.signatures = {}
def __set_updates(self, value):
self.__data["updates"] = value
self.signatures = {}
def __set_version(self, value):
self.__data["version"] = value
self.signatures = {}
def __transform(self):
"""Duplicate and transform 'self.__data' for saving."""
# Use a copy to prevent the in-memory version from being
# affected by the transformations.
struct = copy.deepcopy(self.__data)
for key, val in struct.iteritems():
if isinstance(val, datetime.datetime):
# Convert datetime objects to an ISO-8601
# basic format string.
struct[key] = datetime_to_basic_ts(val)
continue
if key in ("parts", "updates"):
for e in val:
lm = val[e].get("last-modified", None)
if lm:
lm = datetime_to_basic_ts(lm)
val[e]["last-modified"] = lm
return struct
def load(self):
"""Load and transform the catalog attribute data."""
if self.loaded:
# Already loaded, or only in-memory.
return
struct = CatalogPartBase.load(self)
for key, val in struct.iteritems():
if key in ("created", "last-modified"):
# Convert ISO-8601 basic format strings to
# datetime objects. These dates can be
# 'null' due to v0 catalog transformations.
if val:
struct[key] = basic_ts_to_datetime(val)
continue
if key in ("parts", "updates"):
for e in val:
lm = val[e].get("last-modified", None)
if lm:
lm = basic_ts_to_datetime(lm)
val[e]["last-modified"] = lm
self.__data = struct
def save(self):
"""Transform and store the catalog attribute data in a file
using the pathname <self.meta_root>/<self.name>."""
if not self.meta_root:
# Assume this is in-memory only.
return
CatalogPartBase.save(self, self.__transform())
def validate(self, signatures=None):
"""Verifies whether the signatures for the contents of the
CatalogAttrs match the specified signature data, or if not
provided, the current signature data. Raises the exception
named 'BadCatalogSignatures' on failure."""
if not self.signatures and not signatures:
# Nothing to validate.
return
if not signatures:
signatures = self.signatures
new_signatures = self._gen_signatures(self.__transform())
if new_signatures != signatures:
raise api_errors.BadCatalogSignatures(self.pathname)
created = property(__get_created, __set_created)
last_modified = property(__get_last_modified, __set_last_modified)
package_count = property(__get_package_count, __set_package_count)
package_version_count = property(__get_package_version_count,
__set_package_version_count)
parts = property(__get_parts, __set_parts)
updates = property(__get_updates, __set_updates)
version = property(__get_version, __set_version)
class Catalog(object):
"""A Catalog is the representation of the package FMRIs available from
a package repository."""
__BASE_PART = "catalog.base.C"
__DEPS_PART = "catalog.dependency.C"
__SUMM_PART_PFX = "catalog.summary"
# The file mode to be used for all catalog files.
__file_mode = stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IROTH
# These properties are declared here so that they show up in the pydoc
# documentation as private, and for clarity in the property declarations
# found near the end of the class definition.
_attrs = None
__batch_mode = None
__lock = None
__manifest_cb = None
__meta_root = None
__sign = None
# These are used to cache or store CatalogPart and CatalogUpdate objects
# as they are used. It should not be confused with the CatalogPart
# names and CatalogUpdate names stored in the CatalogAttrs object.
__parts = None
__updates = None
# Class Constants
DEPENDENCY, SUMMARY = range(2)
def __init__(self, batch_mode=False, meta_root=None, log_updates=False,
manifest_cb=None, read_only=False, sign=True):
"""Initializes a Catalog object.
'batch_mode' is an optional boolean value that indicates that
the caller intends to perform multiple modifying operations on
catalog before saving. This is useful for performance reasons
as the contents of the catalog will not be sorted after each
change, and the package counts will not be updated (except at
save()). By default this value is False. If this value is
True, callers are responsible for calling finalize() to ensure
that catalog entries are in the correct order and package counts
accurately reflect the catalog contents.
'meta_root' is an optional absolute pathname of a directory
that catalog metadata can be written to and read from, and
must already exist. If no path is supplied, then it is
assumed that the catalog object will be used for in-memory
operations only.
'log_updates' is an optional boolean value indicating whether
updates to the catalog should be logged. This enables consumers
of the catalog to perform incremental updates.
'manifest_cb' is an optional callback used by actions() and
get_entry_actions() to lazy-load Manifest Actions if the catalog
does not have the actions data for a requested package entry.
'read_only' is an optional boolean value that indicates if
operations that modify the catalog are allowed (an assertion
error will be raised if one is attempted and this is True).
'sign' is an optional boolean value that indicates that the
the catalog data should have signature data generated and
embedded when serialized. This option is primarily a matter
of convenience for callers that wish to trade integrity checks
for improved catalog serialization performance."""
self.__batch_mode = batch_mode
self.__manifest_cb = manifest_cb
self.__parts = {}
self.__updates = {}
# Must be set after the above.
self.log_updates = log_updates
self.meta_root = meta_root
self.read_only = read_only
self.sign = sign
# Must be set after the above.
self._attrs = CatalogAttrs(meta_root=self.meta_root, sign=sign)
# This lock is used to protect the catalog file from multiple
# threads writing to it at the same time.
self.__lock = threading.Lock()
# Must be done last.
self.__set_perms()
def __actions(self, info_needed, excludes=EmptyI, cb=None, locales=None,
last_version=False, ordered=False, pubs=EmptyI):
assert info_needed
if not locales:
locales = set(("C",))
else:
locales = set(locales)
for f, entry in self.__entries(cb=cb, info_needed=info_needed,
locales=locales, last_version=last_version,
ordered=ordered, pubs=pubs):
if "actions" in entry:
yield f, self.__gen_actions(entry["actions"],
excludes)
elif self.__manifest_cb:
yield f, self.__gen_lazy_actions(f, info_needed,
locales, excludes)
else:
yield f, EmptyI
def __append(self, src, cb=None, pfmri=None, pubs=EmptyI):
"""Private version; caller responsible for locking."""
base = self.get_part(self.__BASE_PART)
src_base = src.get_part(self.__BASE_PART, must_exist=True)
if src_base is None:
if pfmri:
raise api_errors.UnknownCatalogEntry(pfmri)
# Nothing to do
return
# Use the same operation time and date for all operations so
# that the last modification times will be synchronized. This
# also has the benefit of avoiding extra datetime object
# instantiations.
op_time = datetime.datetime.utcnow()
# For each entry in the 'src' catalog, add its BASE entry to the
# current catalog along and then add it to the 'd'iscard dict if
# 'cb' is defined and returns False.
if pfmri:
entry = src_base.get_entry(pfmri)
if entry is None:
raise api_errors.UnknownCatalogEntry(
pfmri.get_fmri())
entries = [(pfmri, entry)]
else:
entries = src_base.entries()
d = {}
for f, entry in entries:
if pubs and f.publisher not in pubs:
continue
nentry = copy.deepcopy(entry)
if cb is not None:
merge, mdata = cb(src, f, entry)
if not merge:
pub = d.setdefault(f.publisher, {})
plist = pub.setdefault(f.pkg_name,
set())
plist.add(f.version)
continue
if mdata:
if "metadata" in nentry:
nentry["metadata"].update(mdata)
else:
nentry["metadata"] = mdata
base.add(f, metadata=nentry, op_time=op_time)
if d and pfmri:
# If the 'd'iscards dict is populated and pfmri is
# defined, then there is nothing more to do.
return
# Finally, merge any catalog part entries that exist unless the
# FMRI is found in the 'd'iscard dict.
for name in src.parts.keys():
if name == self.__BASE_PART:
continue
part = src.get_part(name, must_exist=True)
if part is None:
# Part doesn't exist in-memory or on-disk, so
# skip it.
continue
if pfmri:
entry = part.get_entry(pfmri)
if entry is None:
# Package isn't in this part; skip it.
continue
entries = [(pfmri, entry)]
else:
entries = part.entries()
npart = self.get_part(name)
for f, entry in entries:
if pubs and f.publisher not in pubs:
continue
if f.publisher in d and \
f.pkg_name in d[f.publisher] and \
f.version in d[f.publisher][f.pkg_name]:
# Skip this package.
continue
nentry = copy.deepcopy(entry)
npart.add(f, metadata=nentry, op_time=op_time)
def __entries(self, cb=None, info_needed=EmptyI,
last_version=False, locales=None, ordered=False, pubs=EmptyI,
tuples=False):
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
return
if not locales:
locales = set(("C",))
else:
locales = set(locales)
parts = []
if self.DEPENDENCY in info_needed:
part = self.get_part(self.__DEPS_PART, must_exist=True)
if part is not None:
parts.append(part)
if self.SUMMARY in info_needed:
for locale in locales:
part = self.get_part(
"%s.%s" % (self.__SUMM_PART_PFX, locale),
must_exist=True)
if part is None:
# Data not available for this
# locale.
continue
parts.append(part)
def merge_entry(src, dest):
for k, v in src.iteritems():
if k == "actions":
dest.setdefault(k, [])
dest[k] += v
elif k != "version":
dest[k] = v
if tuples:
for r, bentry in base.tuple_entries(cb=cb,
last=last_version, ordered=ordered, pubs=pubs):
pub, stem, ver = r
mdata = {}
merge_entry(bentry, mdata)
for part in parts:
entry = part.get_entry(pub=pub,
stem=stem, ver=ver)
if entry is None:
# Part doesn't have this FMRI,
# so skip it.
continue
for k, v in entry.iteritems():
if k == "actions":
mdata.setdefault(k, [])
mdata[k] += v
elif k != "version":
mdata[k] = v
yield r, mdata
return
for f, bentry in base.entries(cb=cb, last=last_version,
ordered=ordered, pubs=pubs):
mdata = {}
merge_entry(bentry, mdata)
for part in parts:
entry = part.get_entry(f)
if entry is None:
# Part doesn't have this FMRI,
# so skip it.
continue
for k, v in entry.iteritems():
if k == "actions":
mdata.setdefault(k, [])
mdata[k] += v
elif k != "version":
mdata[k] = v
yield f, mdata
def __finalize(self, pfmris=None, pubs=None, sort=True):
"""Private finalize method; exposes additional controls for
internal callers."""
package_count = 0
package_version_count = 0
part = self.get_part(self.__BASE_PART, must_exist=True)
if part is not None:
# If the base Catalog didn't exist (in-memory or on-
# disk) that implies there is nothing to sort and
# there are no packages (since the base catalog part
# must always exist for packages to be present).
package_count, package_version_count = \
part.get_package_counts()
if sort:
# Some operations don't need this, such as
# remove...
for part in self.__parts.values():
part.sort(pfmris=pfmris, pubs=pubs)
self._attrs.package_count = package_count
self._attrs.package_version_count = \
package_version_count
@staticmethod
def __gen_actions(actions, excludes=EmptyI):
for astr in actions:
a = pkg.actions.fromstr(astr)
if a.name == "set" and \
(a.attrs["name"].startswith("facet") or
a.attrs["name"].startswith("variant")):
# Don't filter actual facet or variant
# set actions.
yield a
elif a.include_this(excludes):
yield a
def __gen_lazy_actions(self, f, info_needed, locales=EmptyI,
excludes=EmptyI):
# Note that the logic below must be kept in sync with
# group_actions found in add_package.
m = self.__manifest_cb(self, f)
if not m:
# If the manifest callback returns None, then
# assume there is no action data to yield.
return
if Catalog.DEPENDENCY in info_needed:
atypes = ("depend", "set")
elif Catalog.SUMMARY in info_needed:
atypes = ("set",)
else:
raise RuntimeError(_("Unknown info_needed "
"type: %s" % info_needed))
for a, attr_name in self.__gen_manifest_actions(m, atypes,
excludes):
if (a.name == "depend" or \
attr_name.startswith("variant") or \
attr_name.startswith("facet") or \
attr_name.startswith("pkg.depend.") or \
attr_name in ("pkg.obsolete",
"pkg.renamed")):
if Catalog.DEPENDENCY in info_needed:
yield a
elif Catalog.SUMMARY in info_needed and a.name == "set":
if attr_name in ("fmri", "pkg.fmri"):
continue
comps = attr_name.split(":")
if len(comps) > 1:
# 'set' is locale-specific.
if comps[1] not in locales:
continue
yield a
@staticmethod
def __gen_manifest_actions(m, atypes, excludes):
"""Private helper function to iterate over a Manifest's actions
by action type, returning tuples of (action, attr_name)."""
for atype in atypes:
for a in m.gen_actions_by_type(atype):
if not a.include_this(excludes):
continue
if atype == "set":
yield a, a.attrs["name"]
else:
yield a, None
def __get_batch_mode(self):
return self.__batch_mode
def __get_last_modified(self):
return self._attrs.last_modified
def __get_meta_root(self):
return self.__meta_root
def __get_sign(self):
return self.__sign
def __get_update(self, name, cache=True, must_exist=False):
# First, check if the update has already been cached,
# and if so, return it.
ulog = self.__updates.get(name, None)
if ulog is not None:
return ulog
elif not self.meta_root and must_exist:
return
# Next, if the update hasn't been cached,
# create an object for it.
ulog = CatalogUpdate(name, meta_root=self.meta_root,
sign=self.__sign)
if self.meta_root and must_exist and not ulog.exists:
# Update doesn't exist on-disk,
# so don't return anything.
return
if cache:
self.__updates[name] = ulog
return ulog
def __get_version(self):
return self._attrs.version
def __lock_catalog(self):
"""Locks the catalog preventing multiple threads or external
consumers of the catalog from modifying it during operations.
"""
# XXX need filesystem lock too?
self.__lock.acquire()
def __log_update(self, pfmri, operation, op_time, entries=None):
"""Helper function to log catalog changes."""
if not self.__batch_mode:
# The catalog.attrs needs to be updated to reflect
# the changes made. A sort doesn't need to be done
# here as the individual parts will automatically do
# that as needed in this case.
self.__finalize(sort=False)
# This must be set to exactly the same time as the update logs
# so that the changes in the update logs are not marked as
# being newer than the catalog or vice versa.
attrs = self._attrs
attrs.last_modified = op_time
if not self.log_updates:
return
updates = {}
for pname in entries:
# The last component of the updatelog filename is the
# related locale.
locale = pname.split(".", 2)[2]
updates.setdefault(locale, {})
parts = updates[locale]
parts[pname] = entries[pname]
logdate = datetime_to_update_ts(op_time)
for locale, metadata in updates.iteritems():
name = "update.%s.%s" % (logdate, locale)
ulog = self.__get_update(name)
ulog.add(pfmri, operation, metadata=metadata,
op_time=op_time)
attrs.updates[name] = {
"last-modified": op_time
}
for name, part in self.__parts.iteritems():
# Signature data for each part needs to be cleared,
# and will only be available again after save().
attrs.parts[name] = {
"last-modified": part.last_modified
}
def __save(self):
"""Private save function. Caller is responsible for locking
the catalog."""
attrs = self._attrs
if self.log_updates:
for name, ulog in self.__updates.iteritems():
ulog.save()
# Replace the existing signature data
# with the new signature data.
entry = attrs.updates[name] = {
"last-modified": ulog.last_modified
}
for n, v in ulog.signatures.iteritems():
entry["signature-%s" % n] = v
# Save any CatalogParts that are currently in-memory,
# updating their related information in catalog.attrs
# as they are saved.
for name, part in self.__parts.iteritems():
# Must save first so that signature data is
# current.
part.save()
# Now replace the existing signature data with
# the new signature data.
entry = attrs.parts[name] = {
"last-modified": part.last_modified
}
for n, v in part.signatures.iteritems():
entry["signature-%s" % n] = v
# Finally, save the catalog attributes.
attrs.save()
def __set_batch_mode(self, value):
self.__batch_mode = value
for part in self.__parts.values():
part.ordered = not self.__batch_mode
def __set_last_modified(self, value):
self._attrs.last_modified = value
def __set_meta_root(self, pathname):
if pathname:
pathname = os.path.abspath(pathname)
self.__meta_root = pathname
# If the Catalog's meta_root changes, the meta_root of all of
# its parts must be changed too.
if self._attrs:
self._attrs.meta_root = pathname
for part in self.__parts.values():
part.meta_root = pathname
for ulog in self.__updates.values():
ulog.meta_root = pathname
def __set_perms(self):
"""Sets permissions on attrs and parts if not read_only and if
the current user can do so; raises BadCatalogPermissions if the
permissions are wrong and cannot be corrected."""
if not self.meta_root:
# Nothing to do.
return
files = [self._attrs.name]
files.extend(self._attrs.parts.keys())
files.extend(self._attrs.updates.keys())
# Force file_mode, so that unprivileged users can read these.
bad_modes = []
for name in files:
pathname = os.path.join(self.meta_root, name)
try:
if self.read_only:
fmode = stat.S_IMODE(os.stat(
pathname).st_mode)
if fmode != self.__file_mode:
bad_modes.append((pathname,
"%o" % self.__file_mode,
"%o" % fmode))
else:
os.chmod(pathname, self.__file_mode)
except EnvironmentError, e:
# If the file doesn't exist yet, move on.
if e.errno == errno.ENOENT:
continue
# If the mode change failed for another reason,
# check to see if we actually needed to change
# it, and if so, add it to bad_modes.
fmode = stat.S_IMODE(os.stat(
pathname).st_mode)
if fmode != self.__file_mode:
bad_modes.append((pathname,
"%o" % self.__file_mode,
"%o" % fmode))
if bad_modes:
raise api_errors.BadCatalogPermissions(bad_modes)
def __set_sign(self, value):
self.__sign = value
# If the Catalog's sign property changes, the value of that
# property for its attributes, etc. must be changed too.
if self._attrs:
self._attrs.sign = value
for part in self.__parts.values():
part.sign = value
for ulog in self.__updates.values():
ulog.sign = value
def __set_version(self, value):
self._attrs.version = value
def __unlock_catalog(self):
"""Unlocks the catalog allowing other catalog consumers to
modify it."""
# XXX need filesystem unlock too?
self.__lock.release()
def actions(self, info_needed, excludes=EmptyI, cb=None,
last=False, locales=None, ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the format
(fmri, actions) as it iterates over the contents of the
catalog (where 'actions' is a generator that returns the
Actions corresponding to the requested information).
If the catalog doesn't contain any action data for the package
entry, and manifest_cb was defined at Catalog creation time,
the action data will be lazy-loaded by the actions generator;
otherwise it will return an empty iterator. This means that
the manifest_cb will be executed even for packages that don't
actually have any actions corresponding to info_needed. For
example, if a package doesn't have any dependencies, the
manifest_cb will still be executed. This was considered a
reasonable compromise as packages are generally expected to
have DEPENDENCY and SUMMARY information.
'excludes' is a list of variants which will be used to determine
what should be allowed by the actions generator in addition to
what is specified by 'info_needed'.
'cb' is an optional callback function that will be executed for
each package before its action data is retrieved. It must accept
two arguments: 'pkg' and 'entry'. 'pkg' is an FMRI object and
'entry' is the dictionary structure of the catalog entry for the
package. If the callback returns False, then the entry will not
be included in the results. This can significantly improve
performance by avoiding action data retrieval for results that
will not be used.
'info_needed' is a set of one or more catalog constants
indicating the types of catalog data that will be returned
in 'actions' in addition to the above:
DEPENDENCY
Depend and set Actions for package obsoletion,
renaming, variants.
SUMMARY
Any remaining set Actions not listed above, such
as pkg.summary, pkg.description, etc.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the catalog has been saved since the last modifying
operation, or finalize() has has been called, this will also be
the newest version of the package.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pfmri' is an optional FMRI to limit the returned results to.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
return self.__actions(info_needed, excludes=excludes,
cb=cb, last_version=last, locales=locales, ordered=ordered,
pubs=pubs)
def add_package(self, pfmri, manifest=None, metadata=None):
"""Add a package and its related metadata to the catalog and
its parts as needed.
'manifest' is an optional Manifest object that will be used
to retrieve the metadata related to the package.
'metadata' is an optional dict of additional metadata to store
with the package's BASE record."""
assert not self.read_only
def group_actions(actions):
dep_acts = { "C": [] }
# Summary actions are grouped by locale, since each
# goes to a locale-specific catalog part.
sum_acts = { "C": [] }
for act in actions:
if act.name == "depend":
dep_acts["C"].append(str(act))
continue
name = act.attrs["name"]
if name.startswith("variant") or \
name.startswith("facet") or \
name.startswith("pkg.depend.") or \
name in ("pkg.obsolete", "pkg.renamed"):
# variant and facet data goes to the
# dependency catalog part.
dep_acts["C"].append(str(act))
continue
elif name in ("fmri", "pkg.fmri"):
# Redundant in the case of the catalog.
continue
# All other set actions go to the summary
# catalog parts, grouped by locale. To
# determine the locale, the set attribute's
# name is split by ':' into its field and
# locale components. If ':' is not present,
# then the 'C' locale is assumed.
comps = name.split(":")
if len(comps) > 1:
locale = comps[1]
else:
locale = "C"
if locale not in sum_acts:
sum_acts[locale] = []
sum_acts[locale].append(str(act))
return {
"dependency": dep_acts,
"summary": sum_acts,
}
self.__lock_catalog()
try:
entries = {}
# Use the same operation time and date for all
# operations so that the last modification times
# of all catalog parts and update logs will be
# synchronized.
op_time = datetime.datetime.utcnow()
# Always add packages to the base catalog.
entry = {}
if metadata:
entry["metadata"] = metadata
if manifest:
for k, v in manifest.signatures.iteritems():
entry["signature-%s" % k] = v
part = self.get_part(self.__BASE_PART)
entries[part.name] = part.add(pfmri, metadata=entry,
op_time=op_time)
if manifest:
# Without a manifest, only the base catalog data
# can be populated.
# Only dependency and set actions are currently
# used by the remaining catalog parts.
actions = []
for atype in "depend", "set":
actions += manifest.gen_actions_by_type(
atype)
gacts = group_actions(actions)
for ctype in gacts:
for locale in gacts[ctype]:
acts = gacts[ctype][locale]
if not acts:
# Catalog entries only
# added if actions are
# present for this
# ctype.
continue
part = self.get_part("catalog"
".%s.%s" % (ctype, locale))
entry = { "actions": acts }
entries[part.name] = part.add(
pfmri, metadata=entry,
op_time=op_time)
self.__log_update(pfmri, CatalogUpdate.ADD, op_time,
entries=entries)
finally:
self.__unlock_catalog()
def append(self, src, cb=None, pfmri=None, pubs=EmptyI):
"""Appends the entries in the specified 'src' catalog to that
of the current catalog. The caller is responsible for ensuring
that no duplicates exist and must call finalize() afterwards to
to ensure consistent catalog state. This function cannot be
used when log_updates or read_only is enabled.
'cb' is an optional callback function that must accept src,
an FMRI, and entry. Where 'src' is the source catalog the
FMRI's entry is being copied from, and entry is the source
catalog entry. It must return a tuple of the form (append,
metadata), where 'append' is a boolean value indicating if
the specified package should be appended, and 'metadata' is
a dict of additional metadata to store with the package's
BASE record.
'pfmri' is an optional FMRI of a package to append. If not
provided, all FMRIs in the 'src' catalog will be appended.
This filtering is applied before any provided callback.
'pubs' is an optional list of publisher prefixes to restrict
the append operation to. FRMIs that have a publisher not in
the list will be skipped. This filtering is applied before
any provided callback. If not provided, no publisher
filtering will be applied."""
assert not self.log_updates and not self.read_only
self.__lock_catalog()
try:
# Append operations are much slower if batch mode is
# not enabled. This ensures that the current state
# is stored and then reset on completion or failure.
# Since append() is never used as part of the
# publication process (log_updates == True),
# this is safe.
old_batch_mode = self.batch_mode
self.batch_mode = True
self.__append(src, cb=cb, pfmri=pfmri, pubs=pubs)
finally:
self.batch_mode = old_batch_mode
self.__unlock_catalog()
def apply_updates(self, path):
"""Apply any CatalogUpdates available to the catalog based on
the list returned by get_updates_needed. The caller must
retrieve all of the resources indicated by get_updates_needed
and place them in the directory indicated by 'path'."""
if not self.meta_root:
raise api_errors.CatalogUpdateRequirements()
# Used to store the original time each part was modified
# as a basis for determining whether to apply specific
# updates.
old_parts = self._attrs.parts
def apply_incremental(name):
# Load the CatalogUpdate from the path specified.
# (Which is why __get_update is not used.)
ulog = CatalogUpdate(name, meta_root=path)
for pfmri, op_type, op_time, metadata in ulog.updates():
for pname, pdata in metadata.iteritems():
part = self.get_part(pname,
must_exist=True)
if part is None:
# Part doesn't exist; skip.
continue
lm = old_parts[pname]["last-modified"]
if op_time <= lm:
# Only add updates to the part
# that occurred after the last
# time it was originally
# modified.
continue
if op_type == CatalogUpdate.ADD:
part.add(pfmri, metadata=pdata,
op_time=op_time)
elif op_type == CatalogUpdate.REMOVE:
part.remove(pfmri,
op_time=op_time)
else:
raise api_errors.UnknownUpdateType(
op_type)
def apply_full(name):
src = os.path.join(path, name)
dest = os.path.join(self.meta_root, name)
portable.copyfile(src, dest)
self.__lock_catalog()
try:
old_batch_mode = self.batch_mode
self.batch_mode = True
updates = self.get_updates_needed(path)
if updates == None:
# Nothing has changed, so nothing to do.
return
for name in updates:
if name.startswith("update."):
# The provided update is an incremental.
apply_incremental(name)
else:
# The provided update is a full update.
apply_full(name)
# Next, verify that all of the updated parts have a
# signature that matches the new catalog.attrs file.
new_attrs = CatalogAttrs(meta_root=path)
new_sigs = {}
for name, mdata in new_attrs.parts.iteritems():
new_sigs[name] = {}
for key in mdata:
if not key.startswith("signature-"):
continue
sig = key.split("signature-")[1]
new_sigs[name][sig] = mdata[key]
# This must be done to ensure that the catalog
# signature matches that of the source.
self.batch_mode = old_batch_mode
self.finalize()
for name, part in self.__parts.iteritems():
part.validate(signatures=new_sigs[name])
# Finally, save the catalog, and then copy the new
# catalog attributes file into place and reload it.
self.__save()
apply_full(self._attrs.name)
self._attrs = CatalogAttrs(meta_root=self.meta_root)
self.__set_perms()
finally:
self.batch_mode = old_batch_mode
self.__unlock_catalog()
def categories(self, excludes=EmptyI, pubs=EmptyI):
"""Returns a set of tuples of the form (scheme, category)
containing the names of all categories in use by the last
version of each unique package in the catalog on a per-
publisher basis.
'excludes' is a list of variants which will be used to
determine what category actions will be checked.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
acts = self.__actions([self.SUMMARY], excludes=excludes,
last_version=True, pubs=pubs)
return set((
sc
for f, acts in acts
for a in acts
if a.has_category_info()
for sc in a.parse_category_info()
))
@property
def created(self):
"""A UTC datetime object indicating the time the catalog was
created."""
return self._attrs.created
def destroy(self):
"""Removes any on-disk files that exist for the catalog and
discards all content."""
for name in self._attrs.parts:
part = self.get_part(name)
part.destroy()
for name in self._attrs.updates:
ulog = self.__get_update(name, cache=False)
ulog.destroy()
self._attrs = CatalogAttrs(meta_root=self.meta_root,
sign=self.__sign)
self.__parts = {}
self.__updates = {}
self._attrs.destroy()
if not self.meta_root or not os.path.exists(self.meta_root):
return
# Finally, ensure that if there are any leftover files from
# an interrupted destroy in the past that they are removed
# as well.
for fname in os.listdir(self.meta_root):
if not fname.startswith("catalog.") and \
not fname.startswith("update."):
continue
pname = os.path.join(self.meta_root, fname)
if not os.path.isfile(pname):
continue
try:
portable.remove(pname)
except EnvironmentError, e:
if e.errno == errno.EACCES:
raise api_errors.PermissionsException(
e.filename)
if e.errno == errno.EROFS:
raise api_errors.ReadOnlyFileSystemException(
e.filename)
raise
def entries(self, info_needed=EmptyI, last=False, locales=None,
ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the format
(fmri, metadata) as it iterates over the contents of the
catalog (where 'metadata' is a dict containing the requested
information).
'metadata' always contains the following information at a
minimum:
BASE
'metadata' will be populated with Manifest
signature data, if available, using key-value
pairs of the form 'signature-<name>': value.
'info_needed' is an optional list of one or more catalog
constants indicating the types of catalog data that will
be returned in 'metadata' in addition to the above:
DEPENDENCY
'metadata' will contain depend and set Actions
for package obsoletion, renaming, variants,
and facets stored in a list under the
key 'actions'.
SUMMARY
'metadata' will contain any remaining Actions
not listed above, such as pkg.summary,
pkg.description, etc. in a list under the key
'actions'.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the catalog has been saved since the last modifying
operation, or finalize() has has been called, this will also be
the newest version of the package.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
Note that unlike actions(), catalog entries will not lazy-load
action data if it is missing from the catalog.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
return self.__entries(info_needed=info_needed,
last_version=last, locales=locales, ordered=ordered,
pubs=pubs)
def entries_by_version(self, name, info_needed=EmptyI, locales=None,
pubs=EmptyI):
"""A generator function that produces tuples of the format
(version, entries) as it iterates over the contents of the
the catalog, where entries is a list of tuples of the format
(fmri, metadata) and metadata is a dict containing the
requested information.
'metadata' always contains the following information at a
minimum:
BASE
'metadata' will be populated with Manifest
signature data, if available, using key-value
pairs of the form 'signature-<name>': value.
'info_needed' is an optional list of one or more catalog
constants indicating the types of catalog data that will
be returned in 'metadata' in addition to the above:
DEPENDENCY
'metadata' will contain depend and set Actions
for package obsoletion, renaming, variants,
and facets stored in a list under the
key 'actions'.
SUMMARY
'metadata' will contain any remaining Actions
not listed above, such as pkg.summary,
pkg.description, etc. in a list under the key
'actions'.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
return
if not locales:
locales = set(("C",))
else:
locales = set(locales)
parts = []
if self.DEPENDENCY in info_needed:
part = self.get_part(self.__DEPS_PART, must_exist=True)
if part is not None:
parts.append(part)
if self.SUMMARY in info_needed:
for locale in locales:
part = self.get_part(
"%s.%s" % (self.__SUMM_PART_PFX, locale),
must_exist=True)
if part is None:
# Data not available for this
# locale.
continue
parts.append(part)
def merge_entry(src, dest):
for k, v in src.iteritems():
if k == "actions":
dest.setdefault(k, [])
dest[k] += v
elif k != "version":
dest[k] = v
for ver, entries in base.entries_by_version(name, pubs=pubs):
nentries = []
for f, bentry in entries:
mdata = {}
merge_entry(bentry, mdata)
for part in parts:
entry = part.get_entry(f)
if entry is None:
# Part doesn't have this FMRI,
# so skip it.
continue
merge_entry(entry, mdata)
nentries.append((f, mdata))
yield ver, nentries
def entry_actions(self, info_needed, excludes=EmptyI, cb=None,
last=False, locales=None, ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the format
((pub, stem, version), entry, actions) as it iterates over
the contents of the catalog (where 'actions' is a generator
that returns the Actions corresponding to the requested
information).
If the catalog doesn't contain any action data for the package
entry, and manifest_cb was defined at Catalog creation time,
the action data will be lazy-loaded by the actions generator;
otherwise it will return an empty iterator. This means that
the manifest_cb will be executed even for packages that don't
actually have any actions corresponding to info_needed. For
example, if a package doesn't have any dependencies, the
manifest_cb will still be executed. This was considered a
reasonable compromise as packages are generally expected to
have DEPENDENCY and SUMMARY information.
'excludes' is a list of variants which will be used to determine
what should be allowed by the actions generator in addition to
what is specified by 'info_needed'.
'cb' is an optional callback function that will be executed for
each package before its action data is retrieved. It must accept
two arguments: 'pkg' and 'entry'. 'pkg' is an FMRI object and
'entry' is the dictionary structure of the catalog entry for the
package. If the callback returns False, then the entry will not
be included in the results. This can significantly improve
performance by avoiding action data retrieval for results that
will not be used.
'info_needed' is a set of one or more catalog constants
indicating the types of catalog data that will be returned
in 'actions' in addition to the above:
DEPENDENCY
Depend and set Actions for package obsoletion,
renaming, variants.
SUMMARY
Any remaining set Actions not listed above, such
as pkg.summary, pkg.description, etc.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the catalog has been saved since the last modifying
operation, or finalize() has has been called, this will also be
the newest version of the package.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pfmri' is an optional FMRI to limit the returned results to.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
for r, entry in self.__entries(cb=cb, info_needed=info_needed,
locales=locales, last_version=last, ordered=ordered,
pubs=pubs, tuples=True):
if "actions" in entry:
yield (r, entry,
self.__gen_actions(entry["actions"],
excludes))
elif self.__manifest_cb:
pub, stem, ver = r
f = fmri.PkgFmri("%s@%s" % (stem, ver),
publisher=pub)
yield (r, entry,
self.__gen_lazy_actions(f, info_needed,
locales, excludes))
else:
yield r, entry, EmptyI
@property
def exists(self):
"""A boolean value indicating whether the Catalog exists
on-disk."""
# If the Catalog attrs file exists on-disk,
# then the catalog does.
attrs = self._attrs
return attrs.exists
def finalize(self, pfmris=None, pubs=None):
"""This function re-sorts the contents of the Catalog so that
version entries are in the correct order and sets the package
counts for the Catalog based on its current contents.
'pfmris' is an optional set of FMRIs that indicate what package
entries have been changed since this function was last called.
It is used to optimize the finalization process.
'pubs' is an optional set of publisher prefixes that indicate
what publisher has had package entries changed. It is used
to optimize the finalization process. This option has no effect
if 'pfmris' is also provided."""
return self.__finalize(pfmris=pfmris, pubs=pubs)
def fmris(self, last=False, objects=True, ordered=False, pubs=EmptyI):
"""A generator function that produces FMRIs as it iterates
over the contents of the catalog.
'last' is a boolean value that indicates only the last FMRI
for each package on a per-publisher basis should be returned.
As long as the catalog has been saved since the last modifying
operation, or finalize() has has been called, this will also be
the newest version of the package.
'objects' is an optional boolean value indicating whether
FMRIs should be returned as FMRI objects or as strings.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
# This construction is necessary to get python to
# return no results properly to callers expecting
# a generator function.
return iter(())
return base.fmris(last=last, objects=objects, ordered=ordered,
pubs=pubs)
def fmris_by_version(self, name, pubs=EmptyI):
"""A generator function that produces tuples of (version,
fmris), where fmris is a of the fmris related to the
version, for the given package name.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
# This construction is necessary to get python to
# return no results properly to callers expecting
# a generator function.
return iter(())
return base.fmris_by_version(name, pubs=pubs)
def get_entry(self, pfmri, info_needed=EmptyI, locales=None):
"""Returns a dict containing the metadata for the specified
FMRI containing the requested information. If the specified
FMRI does not exist in the catalog, a value of None will be
returned.
'metadata' always contains the following information at a
minimum:
BASE
'metadata' will be populated with Manifest
signature data, if available, using key-value
pairs of the form 'signature-<name>': value.
'info_needed' is an optional list of one or more catalog
constants indicating the types of catalog data that will
be returned in 'metadata' in addition to the above:
DEPENDENCY
'metadata' will contain depend and set Actions
for package obsoletion, renaming, variants,
and facets stored in a list under the
key 'actions'.
SUMMARY
'metadata' will contain any remaining Actions
not listed above, such as pkg.summary,
pkg.description, etc. in a list under the key
'actions'.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
"""
def merge_entry(src, dest):
for k, v in src.iteritems():
if k == "actions":
dest.setdefault(k, [])
dest[k] += v
elif k != "version":
dest[k] = v
parts = []
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
return
if not locales:
locales = set(("C",))
else:
locales = set(locales)
# Always attempt to retrieve the BASE entry as FMRIs
# must be present in the BASE catalog part.
mdata = {}
bentry = base.get_entry(pfmri)
if bentry is None:
return
merge_entry(bentry, mdata)
if self.DEPENDENCY in info_needed:
part = self.get_part(self.__DEPS_PART,
must_exist=True)
if part is not None:
parts.append(part)
if self.SUMMARY in info_needed:
for locale in locales:
part = self.get_part(
"%s.%s" % (self.__SUMM_PART_PFX, locale),
must_exist=True)
if part is None:
# Data not available for this
# locale.
continue
parts.append(part)
for part in parts:
entry = part.get_entry(pfmri)
if entry is None:
# Part doesn't have this FMRI,
# so skip it.
continue
merge_entry(entry, mdata)
return mdata
def get_entry_actions(self, pfmri, info_needed, excludes=EmptyI,
locales=None):
"""A generator function that produces Actions as it iterates
over the catalog entry of the specified FMRI corresponding to
the requested information). If the catalog doesn't contain
any action data for the package entry, and manifest_cb was
defined at Catalog creation time, the action data will be
lazy-loaded by the actions generator; otherwise it will
return an empty iterator.
'excludes' is a list of variants which will be used to determine
what should be allowed by the actions generator in addition to
what is specified by 'info_needed'. If not provided, only
'info_needed' will determine what actions are returned.
'info_needed' is a set of one or more catalog constants
indicating the types of catalog data that will be returned
in 'actions' in addition to the above:
DEPENDENCY
Depend and set Actions for package obsoletion,
renaming, variants.
SUMMARY
Any remaining set Actions not listed above, such
as pkg.summary, pkg.description, etc.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
"""
assert info_needed
if not locales:
locales = set(("C",))
else:
locales = set(locales)
entry = self.get_entry(pfmri, info_needed=info_needed,
locales=locales)
if entry is None:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
if "actions" in entry:
return self.__gen_actions(entry["actions"], excludes)
elif self.__manifest_cb:
return self.__gen_lazy_actions(pfmri, info_needed,
locales, excludes)
else:
return EmptyI
def get_entry_all_variants(self, pfmri):
"""A generator function that yields tuples of the format
(var_name, variants); where var_name is the name of the
variant and variants is a list of the variants for that
name."""
info_needed = [self.DEPENDENCY]
entry = self.get_entry(pfmri, info_needed=info_needed)
if entry is None:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
if "actions" in entry:
actions = self.__gen_actions(entry["actions"])
elif self.__manifest_cb:
actions = self.__gen_lazy_actions(pfmri,
info_needed)
else:
return
for a in actions:
if a.name != "set":
continue
attr_name = a.attrs["name"]
if not attr_name.startswith("variant"):
continue
yield attr_name, a.attrs["value"]
def get_entry_signatures(self, pfmri):
"""A generator function that yields tuples of the form (sig,
value) where 'sig' is the name of the signature, and 'value' is
the raw catalog value for the signature. Please note that the
data type of 'value' is dependent on the signature, so it may
be a string, list, dict, etc."""
entry = self.get_entry(pfmri)
if entry is None:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
return (
(k.split("signature-")[1], v)
for k, v in entry.iteritems()
if k.startswith("signature-")
)
def get_entry_variants(self, pfmri, name):
"""A generator function that returns the variants for the
specified variant name. If no variants exist for the
specified name, None will be returned."""
for var_name, values in self.get_entry_all_variants(pfmri):
if var_name == name:
# A package can only have one set of values
# for a single variant name, so return it.
return values
return None
def get_part(self, name, must_exist=False):
"""Returns the CatalogPart object for the named catalog part.
'must_exist' is an optional boolean value that indicates that
the catalog part must already exist in-memory or on-disk, if
not a value of None will be returned."""
# First, check if the part has already been cached, and if so,
# return it.
part = self.__parts.get(name, None)
if part is not None:
return part
elif not self.meta_root and must_exist:
return
# If the caller said the part must_exist, then it must already
# be part of the catalog attributes to be valid.
aparts = self._attrs.parts
if must_exist and name not in aparts:
return
# Next, since the part hasn't been cached, create an object
# for it and add it to catalog attributes.
part = CatalogPart(name, meta_root=self.meta_root,
ordered=not self.__batch_mode, sign=self.__sign)
if must_exist and self.meta_root and not part.exists:
# This is a double-check for the client case where
# there is a part that is known to the catalog but
# that the client has purposefully not retrieved.
# (Think locale specific data.)
return
self.__parts[name] = part
if name not in aparts:
# Add a new entry to the catalog attributes for this new
# part since it didn't exist previously.
aparts[name] = {
"last-modified": part.last_modified
}
return part
def get_updates_needed(self, path):
"""Returns a list of the catalog files needed to update
the existing catalog parts, based on the contents of the
catalog.attrs file in the directory indicated by 'path'.
A value of None will be returned if the the catalog has
not been modified, while an empty list will be returned
if no catalog parts need to be updated, but the catalog
itself has changed."""
new_attrs = CatalogAttrs(meta_root=path)
if not new_attrs.exists:
# No updates needed (not even to attrs), so return None.
return None
old_attrs = self._attrs
if old_attrs.created != new_attrs.created:
# It's very likely that the catalog has been recreated
# or this is a completely different catalog than was
# expected. In either case, an update isn't possible.
raise api_errors.BadCatalogUpdateIdentity(path)
if new_attrs.last_modified == old_attrs.last_modified:
# No updates needed (not even to attrs), so return None.
return None
# First, verify that all of the catalog parts the client has
# still exist. If they no longer exist, the catalog is no
# longer valid and cannot be updated.
parts = {}
incremental = True
for name in old_attrs.parts:
if name not in new_attrs.parts:
raise api_errors.BadCatalogUpdateIdentity(path)
old_lm = old_attrs.parts[name]["last-modified"]
new_lm = new_attrs.parts[name]["last-modified"]
if new_lm == old_lm:
# Part hasn't changed.
continue
elif new_lm < old_lm:
raise api_errors.ObsoleteCatalogUpdate(path)
# The last component of the update name is the locale.
locale = name.split(".", 2)[2]
# Now check to see if an update log is still offered for
# the last time this catalog part was updated. If it
# does not, then an incremental update cannot be safely
# performed since updates may be missing.
logdate = datetime_to_update_ts(old_lm)
logname = "update.%s.%s" % (logdate, locale)
if logname not in new_attrs.updates:
incremental = False
parts.setdefault(locale, set())
parts[locale].add(name)
# XXX in future, add current locale to this. For now, just
# ensure that all of the locales of parts that were changed
# and exist on-disk are included.
locales = set(("C",))
locales.update(set(parts.keys()))
# Now determine if there are any new parts for this locale that
# this version of the API knows how to use that the client
# doesn't already have.
for name in new_attrs.parts:
if name in parts or name in old_attrs.parts:
continue
# The last component of the name is the locale.
locale = name.split(".", 2)[2]
if locale not in locales:
continue
# Currently, only these parts are used by the client,
# so only they need to be retrieved.
if name == self.__BASE_PART or \
name == self.__DEPS_PART or \
name.startswith(self.__SUMM_PART_PFX):
incremental = False
# If a new part has been added for the current
# locale, then incremental updates can't be
# performed since updates for this locale can
# only be applied to parts that already exist.
parts.setdefault(locale, set())
parts[locale].add(name)
if not parts:
# No updates needed to catalog parts on-disk, but
# catalog has changed.
return []
elif not incremental:
# Since an incremental update cannot be performed,
# just return the updated parts for retrieval.
updates = set()
for locale in parts:
updates.update(parts[locale])
return updates
# Finally, determine the update logs needed based on the catalog
# parts that need updating on a per-locale basis.
updates = set()
for locale in parts:
# Determine the newest catalog part for a given locale,
# this will be used to determine which update logs are
# needed for an incremental update.
last_lm = None
for name in parts[locale]:
if name not in old_attrs.parts:
continue
lm = old_attrs.parts[name]["last-modified"]
if not last_lm or lm > last_lm:
last_lm = lm
for name, uattrs in new_attrs.updates.iteritems():
up_lm = uattrs["last-modified"]
# The last component of the update name is the
# locale.
up_locale = name.split(".", 2)[2]
if not up_locale == locale:
# This update log doesn't apply to the
# locale being evaluated for updates.
continue
if up_lm <= last_lm:
# Older or same as newest catalog part
# for this locale; so skip.
continue
# If this updatelog was changed after the
# newest catalog part for this locale, then
# it is needed to update one or more catalog
# parts for this locale.
updates.add(name)
# Ensure updates are in chronological ascending order.
return sorted(updates)
def names(self, pubs=EmptyI):
"""Returns a set containing the names of all the packages in
the Catalog.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
return set()
return base.names(pubs=pubs)
@property
def package_count(self):
"""The number of unique packages in the catalog."""
return self._attrs.package_count
@property
def package_version_count(self):
"""The number of unique package versions in the catalog."""
return self._attrs.package_version_count
@property
def parts(self):
"""A dict containing the list of CatalogParts that the catalog
is composed of along with information about each part."""
return self._attrs.parts
def pkg_names(self, pubs=EmptyI):
"""A generator function that produces package tuples of the form
(pub, stem) as it iterates over the contents of the catalog.
'pubs' is an optional list that contains the prefixes of the
publishers to restrict the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
# This construction is necessary to get python to
# return no results properly to callers expecting
# a generator function.
return iter(())
return base.pkg_names(pubs=pubs)
def publishers(self):
"""Returns a set containing the prefixes of all the publishers
in the Catalog."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
return set()
return set(p for p in base.publishers())
def remove_package(self, pfmri):
"""Remove a package and its metadata."""
assert not self.read_only
self.__lock_catalog()
try:
# The package has to be removed from every known part.
entries = {}
# Use the same operation time and date for all
# operations so that the last modification times
# of all catalog parts and update logs will be
# synchronized.
op_time = datetime.datetime.utcnow()
for name in self._attrs.parts:
part = self.get_part(name)
if part is None:
continue
pkg_entry = part.get_entry(pfmri)
if pkg_entry is None:
if name == self.__BASE_PART:
# Entry should exist in at least
# the base part.
raise api_errors.UnknownCatalogEntry(
pfmri.get_fmri())
# Skip; package's presence is optional
# in other parts.
continue
part.remove(pfmri, op_time=op_time)
if self.log_updates:
entries[part.name] = pkg_entry
self.__log_update(pfmri, CatalogUpdate.REMOVE, op_time,
entries=entries)
finally:
self.__unlock_catalog()
def save(self):
"""Finalize current state and save to file if possible."""
self.__lock_catalog()
try:
self.__save()
finally:
self.__unlock_catalog()
@property
def signatures(self):
"""Returns a dict of the files the catalog is composed of along
with the last known signatures of each if they are available."""
attrs = self._attrs
sigs = {
attrs.name: attrs.signatures
}
for items in (attrs.parts, attrs.updates):
for name in items:
entry = sigs[name] = {}
for k in items[name]:
try:
sig = k.split("signature-")[1]
entry[sig] = items[name][k]
except IndexError:
# Not a signature entry.
continue
return sigs
def tuples(self, last=False, ordered=False, pubs=EmptyI):
"""A generator function that produces FMRI tuples as it
iterates over the contents of the catalog.
'last' is a boolean value that indicates only the last FMRI
tuple for each package on a per-publisher basis should be
returned. As long as the catalog has been saved since the
last modifying operation, or finalize() has has been called,
this will also be the newest version of the package.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
# Catalog contains nothing.
# This construction is necessary to get python to
# return no results properly to callers expecting
# a generator function.
return iter(())
return base.tuples(last=last, ordered=ordered, pubs=pubs)
def tuple_entries(self, info_needed=EmptyI, last=False, locales=None,
ordered=False, pubs=EmptyI):
"""A generator function that produces tuples of the format
((pub, stem, version), entry, actions) as it iterates over
the contents of the catalog (where 'metadata' is a dict
containing the requested information).
'metadata' always contains the following information at a
minimum:
BASE
'metadata' will be populated with Manifest
signature data, if available, using key-value
pairs of the form 'signature-<name>': value.
'info_needed' is an optional list of one or more catalog
constants indicating the types of catalog data that will
be returned in 'metadata' in addition to the above:
DEPENDENCY
'metadata' will contain depend and set Actions
for package obsoletion, renaming, variants,
and facets stored in a list under the
key 'actions'.
SUMMARY
'metadata' will contain any remaining Actions
not listed above, such as pkg.summary,
pkg.description, etc. in a list under the key
'actions'.
'last' is a boolean value that indicates only the last entry
for each package on a per-publisher basis should be returned.
As long as the catalog has been saved since the last modifying
operation, or finalize() has has been called, this will also be
the newest version of the package.
'locales' is an optional set of locale names for which Actions
should be returned. The default is set(('C',)) if not provided.
Note that unlike actions(), catalog entries will not lazy-load
action data if it is missing from the catalog.
'ordered' is an optional boolean value that indicates that
results should sorted by stem and then by publisher and
be in descending version order. If False, results will be
in a ascending version order on a per-publisher, per-stem
basis.
'pubs' is an optional list of publisher prefixes to restrict
the results to."""
return self.__entries(info_needed=info_needed,
locales=locales, last_version=last, ordered=ordered,
pubs=pubs, tuples=True)
@property
def updates(self):
"""A dict containing the list of known updates for the catalog
along with information about each update."""
return self._attrs.updates
def update_entry(self, pfmri, metadata):
"""Updates the metadata stored in a package's BASE catalog
record for the specified FMRI. Cannot be used when read_only
or log_updates is enabled; should never be used with a Catalog
intended for incremental update usage.
'pfmri' is the FMRI of the package to update the entry for.
'metadata' must be a dict of additional metadata to store with
the package's BASE record."""
assert not self.log_updates and not self.read_only
base = self.get_part(self.__BASE_PART, must_exist=True)
if base is None:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
# get_entry returns the actual catalog entry, so updating it
# simply requires reassignment.
entry = base.get_entry(pfmri)
if entry is None:
raise api_errors.UnknownCatalogEntry(pfmri.get_fmri())
if metadata is None:
if "metadata" in entry:
del entry["metadata"]
return
entry["metadata"] = metadata
op_time = datetime.datetime.utcnow()
attrs = self._attrs
attrs.last_modified = op_time
attrs.parts[base.name] = {
"last-modified": op_time
}
base.last_modified = op_time
def validate(self):
"""Verifies whether the signatures for the contents of the
catalog match the current signature data. Raises the
exception named 'BadCatalogSignatures' on failure."""
self._attrs.validate()
for name in self._attrs.parts:
part = self.get_part(name)
if part is None:
# Part does not exist; no validation needed.
continue
part.validate()
for name in self._attrs.updates:
ulog = self.__get_update(name, cache=False)
if ulog is None:
# Update does not exist; no validation needed.
continue
ulog.validate()
batch_mode = property(__get_batch_mode, __set_batch_mode)
last_modified = property(__get_last_modified, __set_last_modified,
doc="A UTC datetime object indicating the last time the catalog "
"was modified.")
meta_root = property(__get_meta_root, __set_meta_root)
sign = property(__get_sign, __set_sign)
version = property(__get_version, __set_version)
# Methods used by external callers
def verify(filename):
"""Convert the catalog part named by filename into the correct
type of Catalog object and then call its validate method to ensure
that is contents are self-consistent."""
path, fn = os.path.split(filename)
catobj = None
if fn.startswith("catalog"):
if fn.endswith("attrs"):
catobj = CatalogAttrs(meta_root=path)
else:
catobj = CatalogPart(fn, meta_root=path)
elif fn.startswith("update"):
catobj = CatalogUpdate(fn, meta_root=path)
else:
# Unrecognized.
raise api_errors.UnrecognizedCatalogPart(fn)
# With the else case above, this should never be None.
assert catobj
catobj.validate()
# Methods used by Catalog classes.
def datetime_to_ts(dt):
"""Take datetime object dt, and convert it to a ts in ISO-8601
format. """
return dt.isoformat()
def datetime_to_basic_ts(dt):
"""Take datetime object dt, and convert it to a ts in ISO-8601
basic format. """
val = dt.isoformat()
val = val.replace("-", "")
val = val.replace(":", "")
if not dt.tzname():
# Assume UTC.
val += "Z"
return val
def datetime_to_update_ts(dt):
"""Take datetime object dt, and convert it to a ts in ISO-8601
basic partial format. """
val = dt.isoformat()
val = val.replace("-", "")
# Drop the minutes and seconds portion.
val = val.rsplit(":", 2)[0]
val = val.replace(":", "")
if not dt.tzname():
# Assume UTC.
val += "Z"
return val
def now_to_basic_ts():
"""Returns the current UTC time as timestamp in ISO-8601 basic
format."""
return datetime_to_basic_ts(datetime.datetime.utcnow())
def now_to_update_ts():
"""Returns the current UTC time as timestamp in ISO-8601 basic
partial format."""
return datetime_to_update_ts(datetime.datetime.utcnow())
def ts_to_datetime(ts):
"""Take timestamp ts in ISO-8601 format, and convert it to a
datetime object."""
year = int(ts[0:4])
month = int(ts[5:7])
day = int(ts[8:10])
hour = int(ts[11:13])
minutes = int(ts[14:16])
sec = int(ts[17:19])
# usec is not in the string if 0
try:
usec = int(ts[20:26])
except ValueError:
usec = 0
return datetime.datetime(year, month, day, hour, minutes, sec, usec)
def basic_ts_to_datetime(ts):
"""Take timestamp ts in ISO-8601 basic format, and convert it to a
datetime object."""
year = int(ts[0:4])
month = int(ts[4:6])
day = int(ts[6:8])
hour = int(ts[9:11])
minutes = int(ts[11:13])
sec = int(ts[13:15])
# usec is not in the string if 0
try:
usec = int(ts[16:22])
except ValueError:
usec = 0
return datetime.datetime(year, month, day, hour, minutes, sec, usec)
def update_ts_to_datetime(ts):
"""Take timestamp ts in ISO-8601 basic partial format, and convert it
to a datetime object."""
year = int(ts[0:4])
month = int(ts[4:6])
day = int(ts[6:8])
hour = int(ts[9:11])
return datetime.datetime(year, month, day, hour)
def extract_matching_fmris(pkgs, patterns=None, matcher=None,
constraint=None, counthash=None, reverse=True, versions=None):
"""Iterate through the given list of PkgFmri objects,
looking for packages matching 'pattern' in 'patterns', based on the
function in 'matcher' and the versioning constraint described by
'constraint'. If 'matcher' is None, uses fmri subset matching
as the default. If 'patterns' is None, 'versions' may be specified,
and looks for packages matching the patterns specified in 'versions'.
When using 'versions', the 'constraint' parameter is ignored.
'versions' should be a list of strings of the format:
release,build_release-branch:datetime
...with a value of '*' provided for any component to be ignored. '*' or
'?' may be used within each component value and will act as wildcard
characters ('*' for one or more characters, '?' for a single character).
'reverse' is an optional boolean value indicating whether results
should be in descending name and version order. If false, results
will be in ascending name, descending version order.
If 'counthash' is a dictionary, instead store the number of matched
fmris for each package that matches."""
if not matcher:
matcher = fmri.fmri_match
if patterns is None:
patterns = []
elif not isinstance(patterns, list):
patterns = [ patterns ]
if versions is None:
versions = []
elif not isinstance(versions, list):
versions = [ pkg.version.MatchingVersion(versions, None) ]
else:
for i, ver in enumerate(versions):
versions[i] = pkg.version.MatchingVersion(ver, None)
# 'pattern' may be a partially or fully decorated fmri; we want
# to extract its name and version to match separately against
# the catalog.
tuples = {}
if patterns:
matched = {
"matcher": set(),
"publisher": set(),
"version": set(),
}
elif versions:
matched = {
"version": set(),
}
for pattern in patterns:
if isinstance(pattern, fmri.PkgFmri):
tuples[pattern] = pattern.tuple()
else:
assert pattern != None
# XXX "5.11" here needs to be saner
tuples[pattern] = \
fmri.PkgFmri(pattern, "5.11").tuple()
def by_pattern(p):
cat_pub, cat_name = p.tuple()[:2]
pat_match = False
for pattern in patterns:
pat_pub, pat_name, pat_version = tuples[pattern]
if not pat_pub or fmri.is_same_publisher(pat_pub,
cat_pub):
matched["publisher"].add(pattern)
else:
continue
if matcher(cat_name, pat_name):
matched["matcher"].add(pattern)
else:
continue
if not pat_version or (p.version.is_successor(
pat_version, constraint) or \
p.version == pat_version):
matched["version"].add(pattern)
else:
continue
if counthash is not None:
counthash.setdefault(pattern, 0)
counthash[pattern] += 1
pat_match = True
if pat_match:
return p
def by_version(p):
pat_match = False
for ver in versions:
if p.version == ver:
matched["version"].add(ver)
if counthash is not None:
sver = str(ver)
if sver in counthash:
counthash[sver] += 1
else:
counthash[sver] = 1
pat_match = True
if pat_match:
return p
ret = []
if patterns:
unmatched = copy.deepcopy(matched)
for pattern in patterns:
for k in unmatched:
unmatched[k].add(pattern)
for p in pkgs:
res = by_pattern(p)
if res:
ret.append(res)
elif versions:
unmatched = copy.deepcopy(matched)
for ver in versions:
for k in unmatched:
unmatched[k].add(ver)
for p in pkgs:
res = by_version(p)
if res:
ret.append(res)
else:
# No patterns and no versions means that no filtering can be
# applied. It seems silly to call this function in that case,
# but the caller will get what it asked for...
ret = list(pkgs)
if patterns or versions:
match_types = unmatched.keys()
for k in match_types:
# The transformation back to list is important as the
# unmatched results will likely be used to raise an
# InventoryException which expects lists.
unmatched[k] = list(unmatched[k] - matched[k])
if not unmatched[k]:
del unmatched[k]
continue
if not unmatched:
unmatched = None
else:
unmatched = None
if not reverse:
def order(a, b):
res = cmp(a.pkg_name, b.pkg_name)
if res != 0:
return res
res = cmp(a.version, b.version) * -1
if res != 0:
return res
return cmp(a.publisher, b.publisher)
ret.sort(cmp=order)
else:
ret.sort(reverse=True)
return ret, unmatched