Implement bundled file downloads using filelist
authorjohansen <johansen@sun.com>
Thu, 27 Sep 2007 15:40:34 -0700
changeset 119 537d69114be4
parent 118 00efa131edda
child 120 6fa207e1e668
Implement bundled file downloads using filelist
src/Makefile
src/client.py
src/depot.py
src/modules/actions/file.py
src/modules/actions/generic.py
src/modules/client/filelist.py
src/modules/client/pkgplan.py
src/modules/misc.py
src/modules/server/config.py
--- a/src/Makefile	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/Makefile	Thu Sep 27 15:40:34 2007 -0700
@@ -104,6 +104,7 @@
 
 PYCLIENTMODS = \
 	modules/client/__init__.py \
+	modules/client/filelist.py \
 	modules/client/filter.py \
 	modules/client/image.py \
 	modules/client/imageconfig.py \
--- a/src/client.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/client.py	Thu Sep 27 15:40:34 2007 -0700
@@ -63,6 +63,7 @@
 
 import pkg.client.image as image
 import pkg.client.imageplan as imageplan
+import pkg.client.filelist as filelist
 
 def usage():
         print _("""\
@@ -126,7 +127,7 @@
         error = 0
 
         if len(args) > 0:
-                opts, pargs = getopt.getopt(args, "Snvf:")
+                opts, pargs = getopt.getopt(args, "Snvb:f:")
 
         strict = noexecute = verbose = False
         filters = []
@@ -137,6 +138,8 @@
                         noexecute = True
                 elif opt == "-v":
                         verbose = True
+                elif opt == "-b":
+                        filelist.FileList.maxfiles = int(arg)
                 elif opt == "-f":
                         filters += [ arg ]
 
--- a/src/depot.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/depot.py	Thu Sep 27 15:40:34 2007 -0700
@@ -52,6 +52,8 @@
 import sys
 import time
 import urllib
+import tarfile
+import cgi
 
 import pkg.catalog as catalog
 import pkg.dependency as dependency
@@ -100,6 +102,38 @@
         request.end_headers()
         request.wfile.write(data)
 
+def file_get_multiple(scfg, request):
+        """Request data contains application/x-www-form-urlencoded entries
+        with the requested filenames."""
+        hdrs = request.headers
+        # If the sender doesn't specify the content length, reject this request.
+        # Calling read() with no size specified will force the server to block
+        # until the client sends EOF, an undesireable situation
+        size = int(hdrs.getheader("Content-Length"))
+        if size == 0:
+                request.send_response(411)
+                return
+
+        rfile = request.rfile
+        data_dict = cgi.parse_qs(rfile.read(size))
+
+        scfg.inc_flist()
+
+        request.send_response(200)
+        request.send_header("Content-type", "application/data")
+        request.end_headers()
+
+        tar_stream = tarfile.open(mode = "w|", fileobj = request.wfile)
+
+        for v in data_dict.values():
+                filepath = os.path.normpath(os.path.join(
+                    scfg.file_root, misc.hash_file_name(v[0])))
+
+                tar_stream.add(filepath, v[0], False)
+                scfg.inc_flist_files()
+
+        tar_stream.close()
+
 def file_get_single(scfg, request):
         """The request is the SHA-1 hash name for the file."""
         scfg.inc_file()
@@ -108,7 +142,8 @@
         fhash = m.group(1)
 
         try:
-                file = open(scfg.file_root + "/" + misc.hash_file_name(fhash))
+                file = open(os.path.normpath(os.path.join(
+                    scfg.file_root, misc.hash_file_name(fhash))))
         except IOError, e:
                 if e.errno == errno.ENOENT:
                         request.send_response(404)
@@ -209,7 +244,9 @@
                     (self.path, self.headers))
 
         def do_POST(self):
-                if re.match("^/add/(.*)$", self.path):
+                if re.match("^/filelist/.*$", self.path):
+                        file_get_multiple(scfg, self)
+                elif re.match("^/add/(.*)$", self.path):
                         trans_add(scfg, self)
                 else:
                         self.send_response(404)
--- a/src/modules/actions/file.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/modules/actions/file.py	Thu Sep 27 15:40:34 2007 -0700
@@ -69,11 +69,7 @@
                 # For ELF files, only write the new file if the elfhash changed.
                 # XXX This needs to be modularized.
                 # XXX This needs to be controlled by policy.
-                bothelf = orig and "elfhash" in orig.attrs and "elfhash" in self.attrs
-                if not orig or \
-                    (bothelf and orig.attrs["elfhash"] !=
-                        self.attrs["elfhash"]) or \
-                    (not bothelf and orig.hash != self.hash):
+                if self.needsdata(orig): 
                         temp = os.path.normpath(os.path.sep.join(
                             (image.get_root(), path + "." + self.hash)))
 
@@ -102,6 +98,17 @@
                 # This is safe even if temp == final_path.
                 os.rename(temp, final_path)
 
+        def needsdata(self, orig):
+                bothelf = orig and "elfhash" in orig.attrs and "elfhash" in self.attrs
+                if not orig or \
+                    (bothelf and orig.attrs["elfhash"] !=
+                        self.attrs["elfhash"]) or \
+                    (not bothelf and orig.hash != self.hash):
+                        return True
+
+                return False
+
+
         def remove(self, image):
                 path = os.path.normpath(os.path.sep.join(
                     (image.get_root(), self.attrs["path"])))
--- a/src/modules/actions/generic.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/modules/actions/generic.py	Thu Sep 27 15:40:34 2007 -0700
@@ -297,6 +297,11 @@
 
                 os.mkdir(path, leafmode)
 
+        def needsdata(self, orig):
+                """Returns True if the action transition requires a
+                datastream."""
+                return False
+
         def preinstall(self, image, orig):
                 """Client-side method that performs pre-install actions."""
                 pass
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/client/filelist.py	Thu Sep 27 15:40:34 2007 -0700
@@ -0,0 +1,204 @@
+#!/usr/bin/python
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+import sys
+import os
+import exceptions
+import httplib
+import urllib
+import urllib2
+import urlparse
+import tarfile
+import shutil
+import errno
+
+import pkg.client.image
+import pkg.actions as actions
+import pkg.actions.generic as generic
+import pkg.fmri as fmri
+
+class FileList(object):
+        """A FileList maintains mappings between files and Actions.
+        The list is built with knowledge of the Image and the PackagePlan's
+        associated actions.
+
+        The FileList is responsible for downloading the files needed by the
+        PkgPlan from the repository. Once downloaded, the FileList generates
+        the appropriate opener for the actions that it processed.  By
+        downloading files in a group, it is possible to achieve better
+        performance.  This is because the FileList asks for the files to be
+        sent in groups, instead of individual HTTP GET's.
+
+        The caller may limit the maximum number of entries in a FileList
+        by specifying maxents when the object is constructed.  If the caller
+        sets maxents to 0, the size of the list is assumed to be infinite."""
+
+        maxfiles = 64
+
+        def __init__(self, image, fmri, maxents=None):
+                """
+                Create a FileList object for the specified image and pkgplan.
+                """
+
+                self.image = image
+                self.fmri = fmri
+                self.maxents = maxents
+                if self.maxents is None:
+                        self.maxents = self.maxfiles
+                self.fhash = { }
+
+        def add_action(self, action):
+                """Add the specified action to the filelist.  The action
+                must name a file that can be retrieved from the repository."""
+
+                if not hasattr(action, "hash"):
+                        raise FileListException, "Invalid action type"
+                if self.is_full():
+                        raise FileListException, "FileList full"
+
+                hashval = action.hash
+
+                # Each fhash key accesses a list of one or more actions.  If we
+                # already have a key in the dictionary, get the list and append
+                # the action to it.  Otherwise, create a new list with the first
+                # action.
+
+                if hashval in self.fhash:
+                        l = self.fhash[hashval]
+                        l.append(action)
+                else:
+                        self.fhash[hashval] = [ action ]
+
+        def get_files(self):
+                """Instruct the FileList object to download the files
+                for the actions that have been associated with this object.
+
+                This routine will raise a FileListException if the server
+                does not support filelist.  Callers of get_files should
+                consider catching this exception."""
+
+                req_dict = { }
+
+                authority, pkg_name, version = self.fmri.tuple()
+                url_prefix = self.image.get_url_by_authority(authority)
+                url_fpath = "%s/filelist/" % url_prefix
+
+                for i, k in enumerate(self.fhash.keys()):
+                        fstr = "File-Name-%s" % i
+                        req_dict[fstr] = k
+
+                req_str = urllib.urlencode(req_dict)
+
+                req = urllib2.Request(url = url_fpath, data = req_str)
+
+                try:
+                        f = urllib2.urlopen(req)
+                except urllib2.HTTPError, e:
+                        if int(e.code) >= 400:
+                                raise FileListException, \
+                                    "No server-side support" 
+                        else:
+                                raise
+
+                tar_stream = tarfile.open(mode = "r|", fileobj = f)
+                for info in tar_stream:
+                        hashval = info.name
+                        pkgnm = self.fmri.pkg_name
+                        l = self.fhash[hashval]
+                        act = l.pop()
+                        path = act.attrs["path"]
+                        imgroot = self.image.get_root()
+                        # get directory and basename
+                        dir, base = os.path.split(path)
+                        # reconstruct path without basename
+                        path = os.path.normpath(os.path.join(
+                            imgroot, dir))
+
+                        # XXX catch IOError if tar stream closes inadvertently?
+                        tar_stream.extract(info, path)
+
+                        # extract path is where the file now lives
+                        # after the extract
+                        extract_path = os.path.normpath(os.path.join(
+                            path, info.name))
+
+                        # Since the file hash value identifies the content, and
+                        # not the file or package itself, generate temporary
+                        # file names that are unique by package and file name.
+                        # This ensures that each opener gets access to a unique
+                        # file name that hasn't been manipulated by another
+                        # action.
+
+                        mvpath = os.path.normpath(os.path.join(
+                            path, "." + pkgnm + "-" + base + "-" + hashval))
+                         
+                        os.rename(extract_path, mvpath)
+                        
+                        # assign opener
+                        act.data = self._make_opener(mvpath)
+
+                        # If there are more actions in the list, copy the
+                        # extracted file to their paths, changing names as
+                        # appropriate to maintain uniqueness
+                        for action in l:
+                                path = action.attrs["path"]
+                                dir, base = os.path.split(path)
+                                cpdir = os.path.normpath(os.path.join(
+                                    imgroot, dir))
+                                cppath = os.path.normpath(os.path.join(
+                                    cpdir, "." + pkgnm + "-" + base \
+                                    + "-" + hashval))
+                                if not os.path.exists(cpdir):
+                                        os.makedirs(cpdir)
+                                shutil.copy(mvpath, cppath)
+                                action.data = self._make_opener(cppath)
+
+                tar_stream.close()
+                f.close()
+
+        def is_full(self):
+                """Returns true if the FileList object has filled its
+                allocated slots and can no longer accept new actions."""
+
+                if self.maxents > 0 and len(self.fhash) >= self.maxents:
+                        return True
+
+                return False
+
+        @staticmethod
+        def _make_opener(filepath):
+                def opener():
+                        f = open(filepath, "rb")
+                        os.unlink(filepath)
+                        return f
+                return opener                                
+
+
+
+class FileListException(exceptions.Exception):
+        def __init__(self, args=None):
+                self.args = args
--- a/src/modules/client/pkgplan.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/modules/client/pkgplan.py	Thu Sep 27 15:40:34 2007 -0700
@@ -27,6 +27,7 @@
 import os
 
 import pkg.manifest as manifest
+import pkg.client.filelist as filelist
 
 class PkgPlan(object):
         """A package plan takes two package FMRIs and an Image, and produces the
@@ -145,6 +146,9 @@
                 methods, as well as any package-wide steps that need to be taken
                 at such a time.
                 """
+                flist = None
+                flist_supported = True
+
                 # retrieval step
                 if self.destination_fmri == None:
                         os.unlink("%s/pkg/%s/installed" % (self.image.imgdir,
@@ -159,6 +163,34 @@
                         else:
                                 src.preremove(self.image)
 
+                        if dest and dest.needsdata(src) and flist_supported:
+
+                                if flist and flist.is_full():
+                                        try:
+                                                flist.get_files()
+                                        except filelist.FileListException:
+                                                flist_supported = False
+                                                flist = None
+                                                continue
+
+                                        flist = None
+
+                                if flist is None:
+                                        flist = filelist.FileList(
+                                                    self.image,
+                                                    self.destination_fmri)
+
+                                flist.add_action(dest)
+
+
+                # Get any remaining files
+                if flist:
+                        try:
+                                flist.get_files()
+                        except filelist.FileListException:
+                                pass
+                        flist = None
+
         def execute(self):
                 """Perform actions for installation or removal of a package.
                 
--- a/src/modules/misc.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/modules/misc.py	Thu Sep 27 15:40:34 2007 -0700
@@ -23,12 +23,13 @@
 #
 # Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
-#
+
+import os
 
 def hash_file_name(f):
         """Return the two-level path fragment for the given filename, which is
         assumed to be a content hash of at least 8 distinct characters."""
-        return "%s/%s/%s" % (f[0:2], f[2:8], f)
+        return os.path.join("%s" % f[0:2], "%s" % f[2:8], "%s" % f)
 
 if __name__ == "__main__":
         print hash_file_name("abcdefghijklmnopqrstuvwxyz")
--- a/src/modules/server/config.py	Thu Sep 27 13:43:48 2007 -0700
+++ b/src/modules/server/config.py	Thu Sep 27 15:40:34 2007 -0700
@@ -60,6 +60,8 @@
                 self.catalog_requests = 0
                 self.manifest_requests = 0
                 self.file_requests = 0
+                self.flist_requests = 0
+                self.flist_files = 0
 
         def init_dirs(self):
                 # XXX refine try/except
@@ -114,8 +116,11 @@
 Number of catalogs served: %d
 Number of manifests served: %d
 Number of files served: %d
+Number of flists requested: %d
+Number of files served by flist: %d
 """ % (len(self.catalog.pkgs), len(self.in_flight_trans), self.catalog_requests,
-                self.manifest_requests, self.file_requests)
+                self.manifest_requests, self.file_requests,
+                self.flist_requests, self.flist_files)
 
                 return ret
 
@@ -128,3 +133,9 @@
         def inc_file(self):
                 self.file_requests += 1
 
+        def inc_flist(self):
+                self.flist_requests += 1
+
+        def inc_flist_files(self):
+                self.flist_files += 1
+