--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/util/apache2/sysrepo/sysrepo_p5p.py Wed May 23 09:49:43 2012 +1200
@@ -0,0 +1,452 @@
+#!/usr/bin/python2.6
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+import pkg.p5p
+
+import httplib
+import os
+import shutil
+import simplejson
+import sys
+import threading
+import traceback
+
+# redirecting stdout for proper WSGI portability
+sys.stdout = sys.stderr
+
+SERVER_OK_STATUS = "%s %s" % (httplib.OK, httplib.responses[httplib.OK])
+SERVER_ERROR_STATUS = "%s %s" % (httplib.INTERNAL_SERVER_ERROR,
+ httplib.responses[httplib.INTERNAL_SERVER_ERROR])
+SERVER_NOTFOUND_STATUS = "%s %s" % (httplib.NOT_FOUND,
+ httplib.responses[httplib.NOT_FOUND])
+SERVER_BADREQUEST_STATUS = "%s %s" % (httplib.BAD_REQUEST,
+ httplib.responses[httplib.BAD_REQUEST])
+
+response_headers = [("content-type", "application/binary")]
+
+p5p_indices = {}
+
+# A lock to prevent two threads from rebuilding our catalog parts cache
+# at the same time.
+p5p_update_lock = threading.Lock()
+
+class UnknownPathException(Exception):
+ """An exception thrown when a client requests a path within a p5p file
+ which does not exist."""
+ def __init__(self, path):
+ self.path = path
+
+ def __str__(self):
+ return "Unknown path: %s" % self.path
+
+
+class MalformedQueryException(Exception):
+ """An exception thrown when this wsgi application cannot parse a query
+ from the client."""
+ def __init__(self, query, reason):
+ self.query = query
+ self.reason = reason
+
+ def __str__(self):
+ return "Malformed query %s: %s" % (self.query, self.reason)
+
+
+class MissingArchiveException(Exception):
+ """An exception thrown when the p5p file referred to by the
+ configuration does not exist."""
+ def __init__(self, path):
+ self.path = path
+
+ def __str__(self):
+ return "Missing p5p archive: %s" % (self.path)
+
+
+class SysrepoP5p(object):
+ """An object to handle a request for p5p file contents from the
+ system repository."""
+
+ def __init__(self, environ, start_response):
+ self.environ = environ
+ self.start_response = start_response
+ self.p5p_path = None
+ self.p5p = None
+
+ self.query = self.environ["QUERY_STRING"]
+ self.runtime_dir = self.environ["SYSREPO_RUNTIME_DIR"]
+
+ def close(self):
+ """Release any resources we have used."""
+ if self.p5p:
+ self.p5p.close()
+
+ def log_exception(self, status=SERVER_ERROR_STATUS):
+ """Print some information in the Apache log that will help
+ determine what went wrong as well as updating the client
+ response code. The WSGI spec says we can call
+ start_response multiple times, but must include exc_info
+ if we do so."""
+
+ # we only want error_log output if our status is not 4xx
+ if status != SERVER_NOTFOUND_STATUS and \
+ status != SERVER_BADREQUEST_STATUS:
+ print traceback.format_exc()
+ self.start_response(status, response_headers,
+ sys.exc_info())
+
+ def need_update(self, pub, hsh):
+ """Determine if we need to update our cached catalog and
+ reload the index by comparing the last modification time of a
+ file we create per p5p archive, and the p5p archive itself."""
+
+ htdocs_path = os.path.join(self.runtime_dir, "htdocs")
+ timestamp_path = \
+ "%(htdocs_path)s/%(pub)s/%(hsh)s/sysrepo.timestamp" % \
+ locals()
+
+ update = False
+
+ # Locking here is quite basic: we want to ensure that no two
+ # threads simultaneously decide that they need to rebuild our
+ # local catalog cache, stepping on each others toes. It is
+ # possible that while processing a single query, a user will
+ # replace the p5p file on the server after this method has been
+ # called, causing stale data to be returned at best, and a HTTP
+ # 500 response at worst (as the p5p index used by this web
+ # application will not match the one in the new archive)
+ p5p_update_lock.acquire()
+ try:
+ # don't write a timestamp if we're testing
+ if self.environ.get("PKG5_TEST_ENV") == "True":
+ return True
+
+ try:
+ st_p5p = os.stat(self.p5p_path)
+ except OSError, e:
+ if e.errno == os.errno.ENOENT:
+ raise MissingArchiveException(
+ self.p5p_path)
+ try:
+ st_ts = os.stat(timestamp_path)
+ if st_ts.st_mtime < st_p5p.st_mtime:
+ open(timestamp_path, "wb").close()
+ update = True
+ except OSError, e:
+ if e.errno == os.errno.ENOENT:
+ open(timestamp_path, "wb").close()
+ update = True
+
+ except MissingArchiveException, e:
+ raise
+ except Exception, e:
+ self.log_exception()
+ finally:
+ p5p_update_lock.release()
+ return update
+
+ def _file_response(self, path, pub):
+ """Process our file query."""
+
+ # use the basename of the path, which is the pkg(5) hash
+ self.start_response(SERVER_OK_STATUS, response_headers)
+ try:
+ return self.p5p.get_package_file(os.path.basename(path),
+ pub=pub)
+ except pkg.p5p.UnknownArchiveFiles, e:
+ self.log_exception(status=SERVER_NOTFOUND_STATUS)
+ except Exception, e:
+ self.log_exception()
+
+ def _catalog_response(self, path, pub, hsh):
+ """Process our catalog query"""
+
+ cat_part = os.path.basename(path)
+ htdocs_path = os.path.join(self.runtime_dir, "htdocs")
+ cat_path = \
+ "%(htdocs_path)s/%(pub)s/%(hsh)s/catalog/1/%(cat_part)s" % \
+ locals()
+ self.start_response(SERVER_OK_STATUS, response_headers)
+ if os.path.exists(cat_path):
+ return open(cat_path, "rb")
+
+ # this is unlikely to happen: it implies a catalog part has been
+ # requested that wasn't listed in the catalog.attrs file
+ # extracted during _precache_catalog() or the file has been
+ # removed on the server. Do our best to return the content.
+ try:
+ cat_dir = os.path.dirname(cat_path)
+ p5p_update_lock.acquire()
+ try:
+ if not os.path.exists(cat_dir):
+ os.makedirs(cat_dir, 0755)
+ self.p5p.extract_catalog1(cat_part, cat_dir,
+ pub=pub)
+ return open(cat_path, "rb")
+ except (pkg.p5p.UnknownArchiveFiles, IOError), e:
+ self.log_exception(
+ status=SERVER_NOTFOUND_STATUS)
+ except Exception, e:
+ self.log_exception()
+ finally:
+ p5p_update_lock.release()
+ except OSError, e:
+ if e.errno == os.errno.ENOENT:
+ return open(cat_path, "rb")
+ else:
+ raise
+
+ def _manifest_response(self, path, pub):
+ """Return our manifest_response. """
+
+ pkg_name = path.replace("manifest/0/", "")
+ fmri = "pkg://%s/%s" % (pub, pkg_name)
+ mf = None
+ self.start_response(SERVER_OK_STATUS, response_headers)
+ try:
+ mf = self.p5p.get_package_manifest(fmri, raw=True)
+ return mf
+ except pkg.p5p.UnknownPackageManifest, e:
+ self.log_exception(status=SERVER_NOTFOUND_STATUS)
+ except pkg.fmri.IllegalFmri, e:
+ self.log_exception(status=SERVER_NOTFOUND_STATUS)
+ except Exception, e:
+ self.log_exception()
+
+ def _precache_catalog(self, pub, hsh):
+ """Extract the parts from the catalog_dir to the given path."""
+
+ htdocs_path = os.path.join(self.runtime_dir, "htdocs")
+ cat_dir = "%(htdocs_path)s/%(pub)s/%(hsh)s/catalog/1" % \
+ locals()
+
+ if os.path.exists(cat_dir):
+ shutil.rmtree(cat_dir)
+
+ os.makedirs(cat_dir)
+ try:
+ self.p5p.extract_catalog1("catalog.attrs", cat_dir,
+ pub=pub)
+ with open(os.path.join(cat_dir, "catalog.attrs"),
+ "rb") as catalog_attrs:
+ json = simplejson.load(catalog_attrs)
+ for part in json["parts"]:
+ self.p5p.extract_catalog1(part, cat_dir,
+ pub=pub)
+
+ except pkg.p5p.UnknownArchiveFiles, e:
+ # if the catalog part is unavailable,
+ # we ignore this for now. It will be
+ # reported later anyway.
+ pass
+
+ def _parse_query(self):
+ """Parse our query, returning publisher, hash, and path
+ values."""
+
+ keyvals = self.query.split("&")
+ attrs = {}
+ for keyval in keyvals:
+ try:
+ key, val = keyval.split("=", 1)
+ attrs[key] = val
+ except ValueError:
+ raise MalformedQueryException(self.query,
+ "missing key=value pair for %s." % keyval)
+
+ pub = attrs.get("pub")
+ hsh = attrs.get("hash")
+ path = attrs.get("path")
+
+ if not hsh:
+ raise MalformedQueryException(self.query,
+ "missing hash.")
+ if hsh not in self.environ:
+ raise MalformedQueryException(self.query,
+ "unknown hash %s." % hsh)
+ if not pub:
+ raise MalformedQueryException(self.query,
+ "missing publisher.")
+ if not path:
+ raise MalformedQueryException(self.query,
+ "missing path.")
+ return pub, hsh, path
+
+ def execute(self):
+ """Process a query of the form:
+
+ pub=<publisher>&hash=<hash>&path=<path>
+
+ where:
+ <publisher> the name of the publisher from the p5p file
+ <hash> the sha1 hash of the location of the p5p file
+ <path> the path of the pkg(5) client request
+
+ In the environment of this WSGI application, apart from the
+ default WSGI values, defined in PEP333, we expect:
+
+ "SYSREPO_RUNTIME_DIR", a location pointing to the runtime
+ directory, allowing us to serve static html from beneath a
+ "htdocs" subdir.
+
+ <hash>, which maps the sha1 hash of the p5p archive path, to the
+ path itself, which is not visible to clients.
+ """
+
+ buf = []
+ try:
+ pub, hsh, path = self._parse_query()
+ self.p5p_path = self.environ[hsh]
+ # In order to keep only one copy of the p5p index in
+ # memory, we cache it locally, and reuse it any time
+ # we're opening the same p5p file. Before doing
+ # so, we need to ensure the p5p file hasn't been
+ # modified since we last looked at it.
+ if self.need_update(pub, hsh) or \
+ self.p5p_path not in p5p_indices:
+ p5p_update_lock.acquire()
+ try:
+ self.p5p = pkg.p5p.Archive(
+ self.p5p_path)
+ p5p_indices[self.p5p_path] = \
+ self.p5p.get_index()
+ self._precache_catalog(pub, hsh)
+ except:
+ raise
+ finally:
+ p5p_update_lock.release()
+ else:
+ self.p5p = pkg.p5p.Archive(self.p5p_path,
+ archive_index=p5p_indices[self.p5p_path])
+
+ if path.startswith("file"):
+ buf = self._file_response(path, pub)
+ elif path.startswith("catalog/1/"):
+ buf = self._catalog_response(path, pub, hsh)
+ elif path.startswith("manifest/0"):
+ buf = self._manifest_response(path, pub)
+ else:
+ raise UnknownPathException(path)
+ except OSError, e:
+ print e.errno
+ if e.errno == os.errno.ENOENT:
+ self.log_Exception(
+ status=SERVER_NOTFOUND_STATUS)
+ except UnknownPathException, e:
+ self.log_exception(status=SERVER_NOTFOUND_STATUS)
+ except MalformedQueryException, e:
+ self.log_exception(status=SERVER_BADREQUEST_STATUS)
+ except MissingArchiveException, e:
+ self.log_exception()
+ except Exception, e:
+ self.log_exception()
+ return buf
+
+
+#
+# CloseGenerator, AppWrapper and _application as an idiom together
+# are described at
+# http://code.google.com/p/modwsgi/wiki/RegisteringCleanupCode
+# and exist to ensure that we close any server-side resources used by
+# our application at the end of the request (i.e. after the client has
+# received it)
+#
+
+def _application(environ, start_response):
+ sysrepo = SysrepoP5p(environ, start_response)
+ result = sysrepo.execute()
+ return result, sysrepo
+
+
+class CloseGenerator(object):
+ """A wrapper class to ensure we have a close() method on the iterable
+ returned from the mod_wsgi application, see PEP333."""
+
+ def __init__(self, iterable, closeable):
+ self.__iterable = iterable
+ self.__closeable = closeable
+
+ def __iter__(self):
+ # if we haven't produced an iterable, that's
+ # likely because of an exception. Do nothing.
+ if not self.__iterable:
+ return
+ for item in self.__iterable:
+ yield item
+
+ def close(self):
+ try:
+ if hasattr(self.__iterable, "close"):
+ self.__iterable.close()
+ finally:
+ self.__closeable.close()
+
+
+class AppWrapper(object):
+ """Wrap a callable application with this class in order for its results
+ to be handled by CloseGenerator when that callable is called."""
+
+ def __init__(self, application):
+ self.__application = application
+
+ def __call__(self, environ, start_response):
+ result, closeable = self.__application(environ, start_response)
+ return CloseGenerator(result, closeable)
+
+
+application = AppWrapper(_application)
+
+if __name__ == "__main__":
+ """A simple main function to allows us to test any given query/env"""
+ import urllib
+
+ def start_response(status, response_headers, exc_info=None):
+ """A dummy response function."""
+ print "responding with %s" % status
+ if exc_info:
+ print traceback.format_exc(exc_info)
+
+ if len(sys.argv) != 3:
+ query = \
+ ("'pub=test&hash=de5acae11333890c457665379eec812a67f78dd3"
+ "&path=manifest/0/[email protected]%2C5.11-1%3A20110617T204846Z'")
+ alias = \
+ "de5acae11333890c457665379eec812a67f78dd3=/tmp/archive.p5p"
+ print "usage: sysrepo_p5p <query> <hash>=<path to p5p file>"
+ print "eg: ./sysrepo_p5p.py %s %s" % (query, alias)
+ sys.exit(2)
+
+ environ = {}
+
+ # unquote the url, so that we can easily copy/paste entries from
+ # Apache logs when testing.
+ environ["QUERY_STRING"] = urllib.unquote(sys.argv[1])
+ environ["SYSREPO_RUNTIME_DIR"] = os.environ["PWD"]
+ environ["PKG5_TEST_ENV"] = "True"
+ hsh, path = sys.argv[2].split("=")
+ environ[hsh] = path
+
+ for response in application(environ, start_response):
+ if isinstance(response, basestring):
+ print response.rstrip()
+ elif response:
+ for line in response.readlines():
+ print line.rstrip()