983 pkg search returns just one action per package/token-type combo
1949 pkg search is inordinately slow
2122 catalog file update needs to be atomic with respect to both other updates and reads
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/search.txt Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,74 @@
+
+pkg
+SEARCH
+
+1. Goals
+
+ i. Provide relevant information
+ ii. Provide a consistently fast response
+ iii. Make responses consistent between local and remote search
+ iv. Provide the user with a good interface to the information
+ v. Allow seamless recovery when search fails
+ vi. Ensure the index is (almost) always in a consistent state
+
+2. Approach
+
+ From a high level, there are two components to search: the
+ indexer, which maintains the information needed for search; the
+ query engine, which actually performs a search of the information
+ provided. The indexer is responsible for creating and updating the
+ indexes and ensuring they're always in a consistent state. It does this
+ by maintaining a set of inverted indexes as text files (details of which
+ can be found in the comments at the top of indexer.py). On the server
+ side, it's hooked into the publishing code so that the index is updated
+ each time a package is published. If indexing is already happening when
+ packages are published, they're queued and another update to the indexes
+ happens once the current run is finished. On the client side, it's
+ hooked into the install, image-update, and uninstall code so that each
+ of those actions are reflected in the index.
+
+ The query engine is responsible for processing the text from the user,
+ searching for that token in its information, and giving the client code
+ the information needed for a reasonable response to the user. It must
+ ensure that the information it uses is in a consistent state. On the
+ server, an engine is created during the server initialization. It reads
+ in the files it needs and stores the data internally. When the server gets
+ a search request from a client, it hands the search token to the query
+ engine. The query engine ensures that it has the most recent information
+ (locking and rereading the files from disk if necessary) and then searches
+ for the token in its dictionaries. On the client, the process is the same
+ except that the indexes are read from disk each time instead of being stored
+ because a new instance of pkg is started for each search.
+
+3. Details
+
+ Search reserves the $ROOT/index directory for its use on both the client
+ and the server. It also creates a TMP directory inside index which it stores
+ indexes in until it's ready to migrate them to the the proper directory.
+
+ indexer.py contains detailed information about the files used to store the
+ index and their formats.
+
+ 3.1 Locking
+
+ The indexes use a version locking protocol. The requirements for the
+ protocol are:
+ the writer never blocks on readers
+ any number of readers are allowed
+ readers must always have consistent data regardless the
+ writer's actions
+ To implement these features, several conventions must be observed. The
+ writer is responsible for updating these files in another location,
+ then moving them on top of existing files so that from a reader's
+ perspective, file updates are always atomic. Each file in the index has
+ a version in the first line. The writer is responsible for ensuring that
+ each time it updates the index, the files all have the same version
+ number and that version number has not been previously used. The writer
+ is not responsible for moving multiple files atomically, but it should
+ make an effort to have files in $ROOT/index be out of sync for as short
+ a time as is possible.
+
+ The readers are responsible for ensuring that the files their reading
+ the indexes from are a consistent set (have identical version
+ numbers). consistent_open in search_storage takes care of this
+ functionality.
--- a/src/client.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/client.py Fri Jul 25 13:56:38 2008 -0700
@@ -58,6 +58,7 @@
import pkg.client.filelist as filelist
import pkg.client.progress as progress
import pkg.client.bootenv as bootenv
+import pkg.search_errors as search_errors
import pkg.fmri as fmri
import pkg.misc as misc
from pkg.misc import msg, emsg, PipeError
@@ -65,14 +66,14 @@
import pkg
def error(text):
- """ Emit an error message prefixed by the command name """
+ """Emit an error message prefixed by the command name """
# This has to be a constant value as we can't reliably get our actual
# program name on all platforms.
emsg("pkg: " + text)
def usage(usage_error = None):
- """ Emit a usage message and optionally prefix it with a more
+ """Emit a usage message and optionally prefix it with a more
specific error message. Causes program to exit. """
if usage_error:
@@ -104,6 +105,7 @@
[-O origin_url] authority
pkg unset-authority authority ...
pkg authority [-HP] [authname]
+ pkg rebuild-index
Options:
-R dir
@@ -117,6 +119,19 @@
# pkg image-unset name
# pkg image-get [name ...]
+INCONSISTENT_INDEX_ERROR_MESSAGE = "The search index appears corrupted. " + \
+ "Please rebuild the index with 'pkg rebuild-index'."
+
+PROBLEMATIC_PERMISSIONS_ERROR_MESSAGE = " (Failure of consistent use " + \
+ "of pfexec when running pkg commands is often a source of this problem.)"
+
+def get_partial_indexing_error_message(text):
+ return "Result of partial indexing found.\n" + \
+ "Could not make: " + \
+ text + "\nbecause it already exists. " + \
+ "Please use 'pkg rebuild-index' " + \
+ "to fix this problem."
+
def list_inventory(img, args):
all_known = False
display_headers = True
@@ -139,7 +154,7 @@
verbose = True
if summary and verbose:
- usage(_("-s and -v may not be combined"))
+ usage(_("-s and -v may not be combined"))
if verbose:
fmt_str = "%-64s %-10s %s"
@@ -233,7 +248,7 @@
def installed_fmris_from_args(img, args):
- """ Helper function to translate client command line arguments
+ """Helper function to translate client command line arguments
into a list of installed fmris. Used by info, contents, verify.
XXX consider moving into image class
@@ -416,6 +431,15 @@
error(_("image-update failed: %s") % e)
be.restore_image()
ret_code = 1
+ except search_errors.InconsistentIndexException, e:
+ error(INCONSISTENT_INDEX_ERROR_MESSAGE)
+ ret_code = 1
+ except search_errors.PartialIndexingException, e:
+ error(get_partial_indexing_error_message(e.cause))
+ ret_code = 1
+ except search_errors.ProblematicPermissionsIndexException, e:
+ error(str(e) + PROBLEMATIC_PERMISSIONS_ERROR_MESSAGE)
+ ret_code = 1
except Exception, e:
error(_("\nAn unexpected error happened during " \
"image-update: %s") % e)
@@ -507,6 +531,15 @@
error(_("installation failed: %s") % e)
be.restore_install_uninstall()
ret_code = 1
+ except search_errors.InconsistentIndexException, e:
+ error(INCONSISTENT_INDEX_ERROR_MESSAGE)
+ ret_code = 1
+ except search_errors.PartialIndexingException, e:
+ error(get_partial_indexing_error_message(e.cause))
+ ret_code = 1
+ except search_errors.ProblematicPermissionsIndexException, e:
+ error(str(e) + PROBLEMATIC_PERMISSIONS_ERROR_MESSAGE)
+ ret_code = 1
except Exception, e:
error(_("An unexpected error happened during " \
"installation: %s") % e)
@@ -514,7 +547,7 @@
img.cleanup_downloads()
raise
- img.cleanup_downloads()
+ img.cleanup_downloads()
if ret_code == 0:
img.cleanup_cached_content()
@@ -608,7 +641,7 @@
be = bootenv.BootEnv(img.get_root())
except RuntimeError:
be = bootenv.BootEnvNull(img.get_root())
-
+
try:
ip.execute()
except RuntimeError, e:
@@ -665,7 +698,19 @@
searches = []
if local:
- searches.append(img.local_search(pargs))
+ try:
+ searches.append(img.local_search(pargs))
+ except search_errors.NoIndexException, nie:
+ error(str(nie) +
+ "\nPlease try pkg rebuild-index to recreate the " +
+ "index.")
+ return 1
+ except search_errors.InconsistentIndexException, iie:
+ error("The search index appears corrupted. Please "
+ "rebuild the index with pkg rebuild-index.")
+ return 1
+
+
if remote:
searches.append(img.remote_search(pargs, servers))
@@ -797,7 +842,7 @@
if len(pmatch) > 0:
fmris.append(pmatch[0])
else:
- fmris.append(npmatch[0])
+ fmris.append(npmatch[0])
manifests = ( img.get_manifest(f, filtered = True) for f in fmris )
@@ -861,7 +906,7 @@
def display_contents_results(actionlist, attrs, sort_attrs, action_types,
display_headers):
- """ Print results of a "list" operation """
+ """Print results of a "list" operation """
# widths is a list of tuples of column width and justification. Start
# with the widths of the column headers.
@@ -1084,7 +1129,7 @@
if len(pmatch) > 0:
fmris.append(pmatch[0])
else:
- fmris.append(npmatch[0])
+ fmris.append(npmatch[0])
#
# If the user specifies no specific attrs, and no specific
@@ -1415,7 +1460,7 @@
if not misc.valid_auth_url(auth_url):
error(_("image-create: authority URL is invalid"))
- return 1
+ return 1
try:
img.set_attrs(imgtype, pargs[0], is_zone, auth_name, auth_url,
@@ -1435,6 +1480,26 @@
else:
return 0
+
+def rebuild_index(img, pargs):
+ """pkg rebuild-index
+
+ Forcibly rebuild the search indexes. Will remove existing indexes
+ and build new ones from scratch."""
+ quiet = False
+
+ if len(pargs) != 0:
+ usage(_("rebuild-index takes no arguments"))
+
+ try:
+ img.rebuild_search_index(get_tracker(quiet))
+ except search_errors.InconsistentIndexException, iie:
+ error(INCONSISTENT_INDEX_ERROR_MESSAGE)
+ return 1
+ except search_errors.ProblematicPermissionsIndexException, ppie:
+ error(str(ppie) + PROBLEMATIC_PERMISSIONS_ERROR_MESSAGE)
+ return 1
+
def main_func():
img = image.Image()
@@ -1524,6 +1589,8 @@
return authority_unset(img, pargs)
elif subcommand == "authority":
return authority_list(img, pargs)
+ elif subcommand == "rebuild-index":
+ return rebuild_index(img, pargs)
else:
usage(_("unknown subcommand '%s'") % subcommand)
@@ -1549,5 +1616,10 @@
sys.exit(1)
except:
traceback.print_exc()
+ error(
+ "\n\nThis is an internal error, please let the " + \
+ "developers know about this\nproblem by filing " + \
+ "a bug at http://defect.opensolaris.org and including " + \
+ "the\nabove traceback and the output of 'pkg version'.")
sys.exit(99)
sys.exit(ret)
--- a/src/depot.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/depot.py Fri Jul 25 13:56:38 2008 -0700
@@ -61,6 +61,8 @@
READONLY_DEFAULT = False
# Whether the repository catalog should be rebuilt on startup.
REBUILD_DEFAULT = False
+# Whether the indexes should be rebuilt
+REINDEX_DEFAULT = False
import getopt
import os
@@ -96,7 +98,7 @@
sys.exit(2)
class OptionError(Exception):
- """ Option exception. """
+ """Option exception. """
def __init__(self, *args):
Exception.__init__(self, *args)
@@ -108,6 +110,7 @@
socket_timeout = SOCKET_TIMEOUT_DEFAULT
readonly = READONLY_DEFAULT
rebuild = REBUILD_DEFAULT
+ reindex = REINDEX_DEFAULT
if "PKG_REPO" in os.environ:
repo_path = os.environ["PKG_REPO"]
@@ -117,7 +120,7 @@
try:
parsed = set()
opts, pargs = getopt.getopt(sys.argv[1:], "d:np:s:t:",
- ["readonly", "rebuild"])
+ ["readonly", "rebuild", "refresh-index"])
for opt, arg in opts:
if opt in parsed:
raise OptionError, "Each option may only be " \
@@ -145,6 +148,17 @@
readonly = True
elif opt == "--rebuild":
rebuild = True
+ elif opt == "--refresh-index":
+ # Note: This argument is for internal use
+ # only. It's used when pkg.depotd is reexecing
+ # itself and needs to know that's the case.
+ # This flag is purposefully omitted in usage.
+ # The supported way to forcefully reindex is to
+ # kill any pkg.depot using that directory,
+ # remove the index directory, and restart the
+ # pkg.depot process. The index will be rebuilt
+ # automatically on startup.
+ reindex = True
except getopt.GetoptError, e:
print "pkg.depotd: %s" % e.msg
usage()
@@ -155,12 +169,14 @@
print "pkg.depotd: illegal option value: %s specified " \
"for option: %s" % (arg, opt)
usage()
-
- available, msg = port_available(None, port)
- if not available:
- print "pkg.depotd: unable to bind to the specified port: " \
- " %d. Reason: %s" % (port, msg)
- sys.exit(1)
+ # If the program is going to reindex, the port is irrelevant since
+ # the program will not bind to a port.
+ if not reindex:
+ available, msg = port_available(None, port)
+ if not available:
+ print "pkg.depotd: unable to bind to the specified " \
+ "port: %d. Reason: %s" % (port, msg)
+ sys.exit(1)
try:
face.set_content_root(os.environ["PKG_DEPOT_CONTENT"])
@@ -183,6 +199,11 @@
"structures:\n%s" % e
sys.exit(1)
+ if reindex:
+ scfg.acquire_catalog(rebuild = False)
+ scfg.catalog.run_update_index()
+ sys.exit(0)
+
scfg.acquire_in_flight()
scfg.acquire_catalog()
--- a/src/man/pkg.1.txt Tue Jul 22 19:36:15 2008 -0500
+++ b/src/man/pkg.1.txt Fri Jul 25 13:56:38 2008 -0700
@@ -28,6 +28,8 @@
/usr/bin/pkg unset-authority authority ...
/usr/bin/pkg authority [-HP] [authname ...]
+ /usr/bin/pkg rebuild-index
+
/usr/bin/pkg version
/usr/bin/pkg help
@@ -225,6 +227,10 @@
headers from the listing. With -P, display only the preferred
authority.
+ rebuild-index
+ Rebuilds the index used by 'pkg search'. This is a recovery operation
+ not intended for general use.
+
version
Display a unique string identifying the version of pkg(1). This
string is not guaranteed to be comparable in any fashion between
--- a/src/modules/actions/attribute.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/actions/attribute.py Fri Jul 25 13:56:38 2008 -0700
@@ -32,6 +32,8 @@
possible types are: XXX."""
import generic
+import re
+import pkg.fmri as fmri
class AttributeAction(generic.Action):
"""Class representing a package attribute."""
@@ -52,7 +54,44 @@
else:
assert len(attrs) == 2
assert set(attrs.keys()) == set([ "name", "value" ])
-
- def verify(self, img, **args):
- """ since there's no install method, this class is always installed correctly"""
- return []
+
+ def verify(self, img, **args):
+ """Since there's no install method, this class is always
+ installed correctly."""
+
+ return []
+
+ def generate_indices(self):
+ """Generates the indices needed by the search dictionary."""
+ if self.attrs["name"] == "description" or \
+ " " in self.attrs["value"]:
+ return dict(
+ (w, w)
+ for w in self.attrs["value"].split()
+ )
+ elif self.attrs["name"] == "fmri":
+ fmri_obj = fmri.PkgFmri(self.attrs["value"])
+
+ return {
+ self.attrs["name"]: [
+ fmri_obj.get_pkg_stem(include_pkg=False),
+ str(fmri_obj.version.build_release),
+ str(fmri_obj.version.release),
+ str(fmri_obj.version.timestr)
+ ]
+ }
+ elif isinstance(self.attrs["value"], list):
+ tmp = {}
+ for v in self.attrs["value"]:
+ assert isinstance(v, str)
+ if " " in v:
+ words = v.split()
+ for w in words:
+ tmp[w] = w
+ else:
+ tmp[v] = v
+ return tmp
+ else:
+ return {
+ self.attrs["value"]: self.attrs["value"]
+ }
--- a/src/modules/catalog.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/catalog.py Fri Jul 25 13:56:38 2008 -0700
@@ -29,17 +29,14 @@
import re
import urllib
import errno
-import anydbm as dbm
-import signal
+import datetime
import threading
-import datetime
-import sys
-import cPickle
+import tempfile
import bisect
import pkg.fmri as fmri
import pkg.version as version
-import pkg.manifest as manifest
+import pkg.portable as portable
class CatalogException(Exception):
def __init__(self, args=None):
@@ -96,7 +93,7 @@
# interface.
def __init__(self, cat_root, authority = None, pkg_root = None,
- read_only = False):
+ read_only = False, rebuild = True):
"""Create a catalog. If the path supplied does not exist,
this will create the required directory structure.
Otherwise, if the directories are already in place, the
@@ -106,22 +103,17 @@
is represented by this catalog."""
self.catalog_root = cat_root
+ self.catalog_file = os.path.normpath(os.path.join(
+ self.catalog_root, "catalog"))
self.attrs = {}
self.auth = authority
self.renamed = None
- self.searchdb_update_handle = None
- self.searchdb = None
- self._search_available = False
- self.deferred_searchdb_updates = []
- # We need to lock the search database against multiple
- # simultaneous updates from separate threads closing
- # publication transactions.
- self.searchdb_lock = threading.Lock()
self.pkg_root = pkg_root
self.read_only = read_only
- if self.pkg_root:
- self.searchdb_file = os.path.dirname(self.pkg_root) + \
- "/search"
+
+ # The catalog protects the catalog file from having multiple
+ # threads writing to it at the same time.
+ self.catalog_lock = threading.Lock()
self.attrs["npkgs"] = 0
@@ -129,7 +121,7 @@
os.makedirs(cat_root)
# Rebuild catalog, if we're the depot and it's necessary
- if pkg_root is not None:
+ if pkg_root is not None and rebuild:
self.build_catalog()
self.load_attrs()
@@ -156,21 +148,39 @@
else:
pkgstr = "V %s\n" % fmri.get_fmri(anarchy = True)
- pathstr = os.path.normpath(os.path.join(self.catalog_root,
- "catalog"))
+ pathstr = self.catalog_file
+ tmp_num, tmpfile = tempfile.mkstemp(dir=self.catalog_root)
- pfile = file(pathstr, "a+")
+ self.catalog_lock.acquire()
+ tfile = os.fdopen(tmp_num, 'w')
+ try:
+ pfile = file(pathstr, "rb")
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ # Creating an empty file
+ file(pathstr, "wb").close()
+ pfile = file(pathstr, "rb")
+ else:
+ raise
pfile.seek(0)
- for entry in pfile:
- if entry == pkgstr:
- pfile.close()
- raise CatalogException, \
- "Package %s is already in the catalog" % \
- fmri
+ try:
+ for entry in pfile:
+ if entry == pkgstr:
+ self.catalog_lock.release()
+ raise CatalogException(
+ "Package %s is already in the "
+ "catalog" % fmri)
+ else:
+ tfile.write(entry)
+ tfile.write(pkgstr)
+ finally:
+ pfile.close()
+ tfile.close()
- pfile.write(pkgstr)
- pfile.close()
+ portable.rename(tmpfile, pathstr)
+
+ self.catalog_lock.release()
self.attrs["npkgs"] += 1
@@ -314,13 +324,6 @@
def build_catalog(self):
"""Walk the on-disk package data and build (or rebuild) the
package catalog and search database."""
- try:
- idx_mtime = \
- os.stat(self.searchdb_file + ".pag").st_mtime
- except OSError, e:
- if e.errno != errno.ENOENT:
- raise
- idx_mtime = 0
try:
cat_mtime = os.stat(os.path.join(
@@ -330,8 +333,6 @@
raise
cat_mtime = 0
- fmri_list = []
-
# XXX eschew os.walk in favor of another os.listdir here?
tree = os.walk(self.pkg_root)
for pkg in tree:
@@ -346,314 +347,9 @@
# XXX queue this and fork later?
if ver_mtime > cat_mtime:
f = self._fmri_from_path(pkg[0], e)
-
self.add_fmri(f)
print f
- # XXX force a rebuild despite mtimes?
- # If the database doesn't exist, don't bother
- # building the list; we'll just build it all.
- if ver_mtime > idx_mtime > 0:
- fmri_list.append((pkg[0], e))
-
- # If we have no updates to make to the search database but it
- # already exists, just make it available. If we do have updates
- # to make (including possibly building it from scratch), fork it
- # off into another process; when that's done, we'll mark it
- # available.
- if not fmri_list and idx_mtime > 0:
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "w")
-
- self._search_available = True
- except dbm.error, e:
- print >> sys.stderr, \
- "Failed to open search database", \
- "for writing: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "r")
-
- self._search_available = True
- except dbm.error, e:
- print >> sys.stderr, \
- "Failed to open search " + \
- "database: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- else:
- if os.name == 'posix':
- from pkg.subprocess_method import Mopen, PIPE
- try:
- signal.signal(signal.SIGCHLD,
- self.child_handler)
- self.searchdb_update_handle = \
- Mopen(self.update_searchdb,
- [fmri_list], {}, stderr = PIPE)
- except ValueError:
- # If we are in a subthread already,
- # the signal method will not work.
- self.update_searchdb(fmri_list)
- else:
- # On non-unix, where there is no convenient
- # way to fork subprocesses, just update the
- # searchdb inline.
- self.update_searchdb(fmri_list)
-
- def child_handler(self, sig, frame):
- """Handler method for the SIGCLD signal. Checks to see if the
- search database update child has finished, and enables searching
- if it finished successfully, or logs an error if it didn't."""
- if not self.searchdb_update_handle:
- return
-
- rc = self.searchdb_update_handle.poll()
- if rc == 0:
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "w")
-
- self._search_available = True
- self.searchdb_update_handle = None
- except dbm.error, e:
- print >> sys.stderr, \
- "Failed to open search database", \
- "for writing: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "r")
-
- self._search_available = True
- self.searchdb_update_handle = None
- return
- except dbm.error, e:
- print >> sys.stderr, \
- "Failed to open search " + \
- "database: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- return
-
- if self.deferred_searchdb_updates:
- self.update_searchdb(
- self.deferred_searchdb_updates)
- elif rc > 0:
- # XXX This should be logged instead
- print "ERROR building search database:"
- print self.searchdb_update_handle.stderr.read()
-
- def __update_searchdb_unlocked(self, fmri_list):
- if fmri_list:
- if self.searchdb is None:
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "c")
- except dbm.error, e:
- # Since we're here explicitly to update
- # the database, if we fail, there's
- # nothing more to do.
- print >> sys.stderr, \
- "Failed to open search database", \
- "for writing: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- return 1
-
- if not self.searchdb.has_key("indir_num"):
- self.searchdb["indir_num"] = "0"
- else:
- try:
- self.searchdb = \
- dbm.open(self.searchdb_file, "n")
- except dbm.error, e:
- print >> sys.stderr, \
- "Failed to open search database", \
- "for writing: %s (errno=%s)" % \
- (e.args[1], e.args[0])
- return 1
-
- self.searchdb["indir_num"] = "0"
- # XXX We should probably iterate over the catalog, for
- # cases where manifests have stuck around, but have been
- # moved to historical and removed from the catalog.
- fmri_list = (
- (os.path.join(self.pkg_root, pkg), ver)
- for pkg in os.listdir(self.pkg_root)
- for ver in os.listdir(
- os.path.join(self.pkg_root, pkg))
- )
-
- for pkg, vers in fmri_list:
- mfst_path = os.path.join(pkg, vers)
- mfst = manifest.Manifest()
- mfst_file = file(mfst_path)
- mfst.set_content(mfst_file.read())
- mfst_file.close()
-
- f = self._fmri_from_path(pkg, vers)
-
- self.update_index(f, mfst.search_dict())
-
- def update_searchdb(self, fmri_list):
- """Update the search database with the FMRIs passed in via
- 'fmri_list'. If 'fmri_list' is empty or None, then rebuild the
- database from scratch. 'fmri_list' should be a list of tuples
- where the first element is the full path to the package name in
- pkg_root and the second element is the version string."""
-
- # If we're in the process of updating the database in our
- # separate process, and this particular update until that's
- # done.
- if self.searchdb_update_handle:
- self.deferred_searchdb_updates += fmri_list
- return
-
- self.searchdb_lock.acquire()
-
- try:
- self.__update_searchdb_unlocked(fmri_list)
- finally:
- self.searchdb_lock.release()
-
- # If we rebuilt the database from scratch ... XXX why would we
- # want to do this?
- # if new:
- # self.searchdb.close()
- # self.searchdb = None
- self._search_available = True
-
- # Five digits of a base-62 number represents a little over 900 million.
- # Assuming 1 million tokens used in a WOS build (current imports use
- # just short of 500k, but we don't have all the l10n packages, and may
- # not have all the search tokens we want) and keeping every nightly
- # build gives us 2.5 years before we run out of token space. We're
- # likely to garbage collect manifests and rebuild the db before then.
- #
- # XXX We're eventually going to run into conflicts with real tokens
- # here. This is unlikely until we hit, say "alias", which is a ways
- # off, but we should still look at solving this.
- idx_tok_len = 5
-
- def next_token(self):
- alphabet = "abcdefghijklmnopqrstuvwxyz"
- k = "0123456789" + alphabet + alphabet.upper()
-
- num = int(self.searchdb["indir_num"])
-
- s = ""
- for i in range(1, self.idx_tok_len + 1):
- junk, tail = divmod(num, 62 ** i)
- idx, junk = divmod(tail, 62 ** (i - 1))
- s = k[idx] + s
-
- # XXX Do we want to log warnings as we approach index capacity?
- self.searchdb["indir_num"] = \
- str(int(self.searchdb["indir_num"]) + 1)
-
- return s
-
- def update_index(self, fmri, search_dict):
- """Update the search database with the data from the manifest
- for 'fmri', which has been collected into 'search_dict'"""
- # self.searchdb: token -> (type, fmri, action name, key value)
-
- # Don't update the database if it already has this FMRI's
- # indices.
- if self.searchdb.has_key(str(fmri)):
- return
-
- self.searchdb[str(fmri)] = "True"
- for tok_type in search_dict.keys():
- for tok in search_dict[tok_type]:
- # XXX The database files are so damned huge (if
- # holey) because we have zillions of copies of
- # the full fmri strings. We might want to
- # indirect these as well.
- action, keyval = search_dict[tok_type][tok]
- s = "%s %s %s %s" % \
- (tok_type, fmri, action, keyval)
- s_ptr = self.next_token()
- try:
- self.searchdb[s_ptr] = s
- except:
- print >> sys.stderr, "Couldn't add " \
- "'%s' (s_ptr = %s) to search " \
- "database" % (s, s_ptr)
- continue
-
- self.update_chain(tok, s_ptr)
-
- def update_chain(self, token, data_token):
- """Because of the size limitations of the underlying database
- records, not only do we have to store pointers to the actual
- search data, but once the pointer records fill up, we have to
- chain those records up to spillover records. This method adds
- the pointer to the data to the end of the last link in the
- chain, overflowing as necessary. The search token is passed in
- as 'token', and the pointer to the actual data which should be
- returned is passed in as 'data_token'."""
-
- while True:
- try:
- cur = self.searchdb[token]
- except KeyError:
- cur = ""
- l = len(cur)
-
- # According to the ndbm man page, the total length of
- # key and value must be less than 1024. Seems like the
- # actual value is 1018, probably due to some padding or
- # accounting bytes or something. The 2 is for the space
- # separator and the plus-sign for the extension token.
- # XXX The comparison should be against 1017, but that
- # crahes in the if clause below trying to append the
- # extension token. Dunno why.
- if len(token) + l + self.idx_tok_len + 2 > 1000:
- # If we're adding the first element in the next
- # link of the chain, add the extension token to
- # the end of this link, and put the token
- # pointing to the data at the beginning of the
- # next link.
- if cur[-(self.idx_tok_len + 1)] != "+":
- nindir_tok = "+" + self.next_token()
- self.searchdb[token] += " " + nindir_tok
- self.searchdb[nindir_tok] = data_token
- break # from while True; we're done
- # If we find an extension token, start looking
- # at the next chain link.
- else:
- token = cur[-(self.idx_tok_len + 1):]
- continue
-
- # If we get here, it's safe to append the data token to
- # the current link, and get out.
- if cur:
- self.searchdb[token] += " " + data_token
- else:
- self.searchdb[token] = data_token
- break
-
- def search(self, token):
- """Search through the search database for 'token'. Return a
- list of token type / fmri pairs."""
- ret = []
-
- while True:
- # For each indirect token in the search token's value,
- # add its value to the return list. If we see a chain
- # token, switch to its value and continue. If we fall
- # out of the loop without seeing a chain token, we can
- # return.
- for tok in self.searchdb[token].split():
- if tok[0] == "+":
- token = tok
- break
- else:
- ret.append(
- self.searchdb[tok].split(" ", 1))
- else:
- return ret
-
# XXX Now this is only used by rename_package() and a handful of tests.
def get_matching_fmris(self, patterns):
"""Wrapper for extract_matching_fmris."""
@@ -962,7 +658,7 @@
for d in self.fmri_renamed_dest(fmri):
pkgs.append(d.old_fmri())
ol = self.rename_older_pkgs(d.old_fmri())
- pkgs.extend(ol)
+ pkgs.extend(ol)
return pkgs
@@ -1065,7 +761,7 @@
def timestamp():
"""Return an integer timestamp that can be used for comparisons."""
- tobj = datetime.datetime.now()
+ tobj = datetime.datetime.now()
tstr = tobj.isoformat()
return tstr
--- a/src/modules/client/image.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/client/image.py Fri Jul 25 13:56:38 2008 -0700
@@ -47,9 +47,12 @@
import pkg.client.imageplan as imageplan
import pkg.client.retrieve as retrieve
import pkg.portable as portable
+import pkg.client.query_engine as query_e
+import pkg.indexer as indexer
from pkg.misc import versioned_urlopen
from pkg.misc import TransferTimedOutException
+from pkg.misc import CLIENT_DEFAULT_MEM_USE_KB
from pkg.client.imagetypes import *
img_user_prefix = ".org.opensolaris,pkg"
@@ -141,6 +144,7 @@
self.root = None
self.imgdir = None
self.img_prefix = None
+ self.index_dir = None
self.repo_uris = []
self.filter_tags = {}
self.catalogs = {}
@@ -154,7 +158,7 @@
self.attrs["Policy-Pursue-Latest"] = True
self.imageplan = None # valid after evaluation succceds
-
+
# contains a dictionary w/ key = pkgname, value is miminum
# frmi.XXX Needs rewrite using graph follower
self.optional_dependencies = {}
@@ -244,7 +248,7 @@
self.img_prefix = img_user_prefix
else:
self.img_prefix = img_root_prefix
- self.imgdir = os.path.join(self.root, self.img_prefix)
+ self.imgdir = os.path.join(self.root, self.img_prefix)
self.mkdirs()
self.cfg_cache = imageconfig.ImageConfig()
@@ -268,8 +272,8 @@
return self.root == "/"
def is_zone(self):
- zone = self.cfg_cache.filters.get("opensolaris.zone", "")
- return zone == "nonglobal"
+ zone = self.cfg_cache.filters.get("opensolaris.zone", "")
+ return zone == "nonglobal"
def get_root(self):
return self.root
@@ -402,7 +406,7 @@
self.cfg_cache.write("%s/cfg_cache" % self.imgdir)
def verify(self, fmri, progresstracker, **args):
- """ generator that returns any errors in installed pkgs
+ """generator that returns any errors in installed pkgs
as tuple of action, list of errors"""
for act in self.get_manifest(fmri, filtered = True).actions:
@@ -415,14 +419,23 @@
yield (actname, errors)
def gen_installed_actions(self):
- """ generates actions in installed image """
+ """generates actions in installed image """
for fmri in self.gen_installed_pkgs():
for act in self.get_manifest(fmri, filtered = True).actions:
yield act
+ def get_fmri_manifest_pairs(self):
+ """For each installed fmri, finds the path to its manifest file
+ and adds the pair of the fmri and the path to a list. Once all
+ installed fmris have been processed, the list is returned."""
+ return [
+ (fmri, self.get_manifest_path(fmri))
+ for fmri in self.gen_installed_pkgs()
+ ]
+
def get_link_actions(self):
- """ return a dictionary of hardlink action lists indexed by
+ """return a dictionary of hardlink action lists indexed by
target """
if self.link_actions != None:
return self.link_actions
@@ -477,7 +490,6 @@
fmri_dir_path = os.path.join(self.imgdir, "pkg",
fmri.get_dir_path())
mpath = os.path.join(fmri_dir_path, "manifest")
- ipath = os.path.join(fmri_dir_path, "index")
# Get manifest as a string from the remote host, then build
# it up into an in-memory manifest, then write the finished
@@ -498,7 +510,7 @@
m["authority"] = fmri.get_authority()
try:
- m.store(mpath, ipath)
+ m.store(mpath)
except EnvironmentError, e:
if e.errno not in (errno.EROFS, errno.EACCES):
raise
@@ -521,6 +533,13 @@
return True
+ def get_manifest_path(self, fmri):
+ """Find on-disk manifest and create in-memory Manifest
+ object, applying appropriate filters as needed."""
+ mpath = os.path.join(self.imgdir, "pkg",
+ fmri.get_dir_path(), "manifest")
+ return mpath
+
def get_manifest(self, fmri, filtered = False):
"""Find on-disk manifest and create in-memory Manifest
object, applying appropriate filters as needed."""
@@ -552,21 +571,21 @@
# we have. Keep the old manifest and drive on.
pass
- # XXX perhaps all of the below should live in Manifest.filter()?
+ # XXX perhaps all of the below should live in Manifest.filter()?
if filtered:
- filters = []
- try:
- f = file("%s/filters" % fmri_dir_path, "r")
- except IOError, e:
- if e.errno != errno.ENOENT:
- raise
- else:
- filters = [
- (l.strip(), compile(
- l.strip(), "<filter string>", "eval"))
- for l in f.readlines()
- ]
- m.filter(filters)
+ filters = []
+ try:
+ f = file("%s/filters" % fmri_dir_path, "r")
+ except IOError, e:
+ if e.errno != errno.ENOENT:
+ raise
+ else:
+ filters = [
+ (l.strip(), compile(
+ l.strip(), "<filter string>", "eval"))
+ for l in f.readlines()
+ ]
+ m.filter(filters)
return m
@@ -1022,7 +1041,8 @@
if auth["prefix"] == self.cfg_cache.preferred_authority:
authpfx = "%s_%s" % (pkg.fmri.PREF_AUTH_PFX,
auth["prefix"])
- c = catalog.Catalog(croot, authority = authpfx)
+ c = catalog.Catalog(croot,
+ authority=authpfx)
else:
c = catalog.Catalog(croot,
authority = auth["prefix"])
@@ -1052,7 +1072,7 @@
# Get the catalog for the correct authority
cat = self.get_catalog(cfmri)
- return cat.rename_is_same_pkg(cfmri, pfmri)
+ return cat.rename_is_same_pkg(cfmri, pfmri)
def fmri_is_successor(self, cfmri, pfmri):
@@ -1197,13 +1217,13 @@
self.update_optional_dependency(min_fmri)
def get_user_by_name(self, name):
- return portable.get_user_by_name(name, self.root,
- self.type != IMG_USER)
+ return portable.get_user_by_name(name, self.root,
+ self.type != IMG_USER)
def get_name_by_uid(self, uid, returnuid = False):
# XXX What to do about IMG_PARTIAL?
try:
- return portable.get_name_by_uid(uid, self.root,
+ return portable.get_name_by_uid(uid, self.root,
self.type != IMG_USER)
except KeyError:
if returnuid:
@@ -1212,12 +1232,12 @@
raise
def get_group_by_name(self, name):
- return portable.get_group_by_name(name, self.root,
- self.type != IMG_USER)
+ return portable.get_group_by_name(name, self.root,
+ self.type != IMG_USER)
def get_name_by_gid(self, gid, returngid = False):
try:
- return portable.get_name_by_gid(gid, self.root,
+ return portable.get_name_by_gid(gid, self.root,
self.type != IMG_USER)
except KeyError:
if returngid:
@@ -1444,54 +1464,20 @@
for f in nplist:
yield f
+ def update_index_dir(self, postfix="index"):
+ """Since the index directory will not reliably be updated when
+ the image root is, this should be called prior to using the
+ index directory.
+ """
+ self.index_dir = os.path.join(self.imgdir, postfix)
+
def local_search(self, args):
"""Search the image for the token in args[0]."""
- idxdir = os.path.join(self.imgdir, "pkg")
-
- # Convert a full directory path to the FMRI it represents.
- def idx_to_fmri(index):
- return pkg.fmri.PkgFmri(urllib.unquote(os.path.dirname(
- index[len(idxdir) + 1:]).replace(os.path.sep, "@")),
- None)
-
- indices = (
- (os.path.join(dir, "index"), os.path.join(dir, "manifest"))
- for dir, dirnames, filenames in os.walk(idxdir)
- if "manifest" in filenames and "installed" in filenames
- )
-
- for index, mfst in indices:
- # Try loading the index; if that fails, try parsing the
- # manifest.
- try:
- d = cPickle.load(file(index))
- except:
- m = manifest.Manifest()
- try:
- mcontent = file(mfst).read()
- except:
- # XXX log something?
- continue
- m.set_content(mcontent)
- try:
- m.pickle(file(index, "wb"))
- except:
- pass
- d = m.search_dict()
-
- for k, v in d.items():
- if args[0] in v:
- # Yield the index name (such as
- # "basename", the fmri, and then
- # the "match results" which
- # include the action name and
- # the value of the key attribute
- try:
- yield k, idx_to_fmri(index), \
- v[args[0]][0], v[args[0]][1]
- except TypeError:
- yield k, idx_to_fmri(index), \
- "", ""
+ assert args[0]
+ self.update_index_dir()
+ qe = query_e.ClientQueryEngine(self.index_dir)
+ query = query_e.Query(args[0])
+ return qe.search(query)
def remote_search(self, args, servers = None):
"""Search for the token in args[0] on the servers in 'servers'.
@@ -1522,9 +1508,9 @@
for l in res.read().splitlines():
fields = l.split()
if len(fields) < 4:
- yield fields[:2] + [ "", "" ]
+ yield fields[:2] + [ "", "" ]
else:
- yield fields[:4]
+ yield fields[:4]
except socket.timeout, e:
failed.append((auth, e))
continue
@@ -1566,7 +1552,7 @@
"""Called when directory contains something and it's not supposed
to because it's being deleted. XXX Need to work out a better error
passback mechanism. Path is rooted in /...."""
-
+
salvagedir = os.path.normpath(
os.path.join(self.imgdir, "lost+found",
path + "-" + time.strftime("%Y%m%dT%H%M%SZ")))
@@ -1580,7 +1566,7 @@
"in %s" % (path, salvagedir))
def expanddirs(self, dirs):
- """ given a set of directories, return expanded set that includes
+ """given a set of directories, return expanded set that includes
all components"""
out = set()
for d in dirs:
@@ -1595,7 +1581,7 @@
verbose = False, noexecute = False):
"""Take a list of packages, specified in pkg_list, and attempt
to assemble an appropriate image plan. This is a helper
- routine for some common operations in the client.
+ routine for some common operations in the client.
This method checks all authorities for a package match;
however, it defaults to choosing the preferred authority
@@ -1656,7 +1642,7 @@
if len(pmatch) > 0:
ip.propose_fmri(pmatch[0])
else:
- ip.propose_fmri(npmatch[0])
+ ip.propose_fmri(npmatch[0])
if error != 0:
raise RuntimeError, "Unable to assemble image plan"
@@ -1666,11 +1652,23 @@
msg(ip)
ip.evaluate()
- self.imageplan = ip
+ self.imageplan = ip
if verbose:
msg(_("After evaluation:"))
msg(ip.display())
+ def rebuild_search_index(self, progtracker):
+ """Rebuilds the search indexes. Removes all
+ existing indexes and replaces them from scratch rather than
+ performing the incremental update which is usually used."""
+ self.update_index_dir()
+ if not os.path.isdir(self.index_dir):
+ img.mkdirs()
+ ind = indexer.Indexer(self.index_dir,
+ CLIENT_DEFAULT_MEM_USE_KB, progtracker)
+ ind.check_index(self.get_fmri_manifest_pairs(),
+ force_rebuild = True)
+
if __name__ == "__main__":
pass
--- a/src/modules/client/imageplan.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/client/imageplan.py Fri Jul 25 13:56:38 2008 -0700
@@ -24,12 +24,13 @@
# Use is subject to license terms.
import pkg.fmri as fmri
-import os.path
import pkg.client.pkgplan as pkgplan
import pkg.client.retrieve as retrieve # XXX inventory??
import pkg.version as version
+import pkg.indexer as indexer
from pkg.client.filter import compile_filter
from pkg.misc import msg
+from pkg.misc import CLIENT_DEFAULT_MEM_USE_KB
UNEVALUATED = 0
EVALUATED_OK = 1
@@ -96,7 +97,7 @@
for pp in self.pkg_plans:
msg("%s -> %s" % (pp.origin_fmri, pp.destination_fmri))
-
+
def is_proposed_fmri(self, fmri):
for pf in self.target_fmris:
if self.image.fmri_is_same_pkg(fmri, pf):
@@ -137,7 +138,7 @@
def older_version_proposed(self, fmri):
# returns true if older version of this fmri has been
- # proposed already
+ # proposed already
for p in self.target_fmris:
if self.image.fmri_is_successor(fmri, p):
return True
@@ -174,7 +175,7 @@
if self.directories == None:
dirs = set(["var/pkg", "var/sadm/install"])
dirs.update(
- [
+ [
d
for fmri in self.gen_new_installed_pkgs()
for act in self.image.get_manifest(fmri, filtered = True).actions
@@ -282,7 +283,7 @@
self.pkg_plans.append(pp)
def evaluate_fmri_removal(self, pfmri):
- # prob. needs breaking up as well
+ # prob. needs breaking up as well
assert self.image.has_manifest(pfmri)
dependents = self.image.get_dependents(pfmri)
@@ -337,7 +338,7 @@
self.progtrack.evaluate_start()
outstring = ""
-
+
# Operate on a copy, as it will be modified in flight.
for f in self.target_fmris[:]:
self.progtrack.evaluate_progress()
@@ -350,8 +351,8 @@
if outstring:
raise RuntimeError("No packages were installed because "
"package dependencies could not be satisfied\n" +
- outstring)
-
+ outstring)
+
for f in self.target_fmris:
self.add_pkg_plan(f)
self.progtrack.evaluate_progress()
@@ -363,7 +364,7 @@
self.progtrack.evaluate_done()
self.state = EVALUATED_OK
-
+
def nothingtodo(self):
""" Test whether this image plan contains any work to do """
@@ -381,6 +382,17 @@
self.state = PREEXECUTED_OK
return
+ # Checks the index to make sure it exists and is
+ # consistent. If it's inconsistent an exception is thrown.
+ # If it's totally absent, it will index the existing packages
+ # so that the incremental update that follows at the end of
+ # the function will work correctly.
+ self.image.update_index_dir()
+ self.ind = indexer.Indexer(self.image.index_dir,
+ CLIENT_DEFAULT_MEM_USE_KB, progtrack=self.progtrack)
+ self.ind.check_index(self.image.get_fmri_manifest_pairs(),
+ force_rebuild=False)
+
npkgs = 0
nfiles = 0
nbytes = 0
@@ -423,7 +435,7 @@
# than removal and re-install, since these two have separate
# semanticas.
#
- # General install method is removals, updates and then
+ # General install method is removals, updates and then
# installs. User and group installs are moved to be ahead of
# updates so that a package that adds a new user can specify
# that owner for existing files.
@@ -461,11 +473,11 @@
for src, dest in p.gen_install_actions()
]
- # move any user/group actions into modify list to
+ # move any user/group actions into modify list to
# permit package to add user/group and change existing
# files to that user/group in a single update
# iterate over copy since we're modify install_actions
-
+
for a in install_actions[:]:
if a[2].name == "user" or a[2].name == "group":
update_actions.append(a)
@@ -480,12 +492,13 @@
self.progtrack.actions_add_progress()
self.progtrack.actions_done()
-
+
# generate list of install actions, sort and execute
install_actions.sort(key = lambda obj:obj[2])
- self.progtrack.actions_set_goal("Install Phase", len(install_actions))
+ self.progtrack.actions_set_goal("Install Phase",
+ len(install_actions))
for p, src, dest in install_actions:
p.execute_install(src, dest)
@@ -496,6 +509,36 @@
for p in self.pkg_plans:
p.postexecute()
-
+
self.state = EXECUTED_OK
+
+ del actions
+ del update_actions
+ del install_actions
+ del self.target_rem_fmris
+ del self.target_fmris
+ del self.directories
+
+ # Perform the incremental update to the search indexes
+ # for all changed packages
+ plan_info = []
+ for p in self.pkg_plans:
+ d_fmri = p.destination_fmri
+ d_manifest_path = None
+ if d_fmri:
+ d_manifest_path = \
+ self.image.get_manifest_path(d_fmri)
+ o_fmri = p.origin_fmri
+ o_manifest_path = None
+ o_filter_file = None
+ if o_fmri:
+ o_manifest_path = \
+ self.image.get_manifest_path(o_fmri)
+ plan_info.append((d_fmri, d_manifest_path, o_fmri,
+ o_manifest_path))
+ del self.pkg_plans
+ self.progtrack.actions_set_goal("Index Phase", len(plan_info))
+ self.ind.client_update_index((self.filters, plan_info))
+
+
--- a/src/modules/client/progress.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/client/progress.py Fri Jul 25 13:56:38 2008 -0700
@@ -31,6 +31,8 @@
import time
from pkg.misc import msg, PipeError
+IND_DELAY = 0.05
+
class ProgressTracker(object):
""" This abstract class is used by the client to render and track
progress towards the completion of various tasks, such as
@@ -68,6 +70,11 @@
self.act_phase = "None"
self.act_phase_last = "None"
+ self.ind_cur_nitems = 0
+ self.ind_goal_nitems = 0
+ self.ind_phase = "None"
+ self.ind_phase_last = "None"
+
self.debug = False
self.last_printed = 0 # when did we last emit status?
@@ -156,8 +163,20 @@
self.act_output_done()
assert self.act_goal_nactions == self.act_cur_nactions
- def actions_get_progress(self):
- msg("not yet")
+ def index_set_goal(self, phase, nitems):
+ self.ind_phase = phase
+ self.ind_goal_nitems = nitems
+ self.ind_cur_nitems = 0
+
+ def index_add_progress(self):
+ self.ind_cur_nitems += 1
+ if self.ind_goal_nitems > 0:
+ self.ind_output()
+
+ def index_done(self):
+ if self.ind_goal_nitems > 0:
+ self.ind_output_done()
+ assert self.ind_goal_nitems == self.ind_cur_nitems
#
# This set of methods should be regarded as abstract *and* protected.
@@ -197,6 +216,12 @@
def act_output_done(self):
raise NotImplementedError("act_output_done() not implemented in superclass")
+ def ind_output(self):
+ raise NotImplementedError("ind_output() not implemented in superclass")
+
+ def ind_output_done(self):
+ raise NotImplementedError("ind_output_done() not implemented in superclass")
+
class ProgressTrackerException(Exception):
@@ -239,6 +264,10 @@
def act_output_done(self): return
+ def ind_output(self): return
+
+ def ind_output_done(self): return
+
class NullProgressTracker(QuietProgressTracker):
""" This ProgressTracker is a subclass of QuietProgressTracker
@@ -317,6 +346,26 @@
raise PipeError, e
raise
+ def ind_output(self):
+ if self.ind_phase != self.ind_phase_last:
+ try:
+ print "%s ... " % self.ind_phase,
+ except IOError, e:
+ if e.errno == errno.EPIPE:
+ raise PipeError, e
+ raise
+ self.ind_phase_last = self.ind_phase
+ return
+
+ def ind_output_done(self):
+ try:
+ print "Done"
+ sys.stdout.flush()
+ except IOError, e:
+ if e.errno == errno.EPIPE:
+ raise PipeError, e
+ raise
+
class FancyUNIXProgressTracker(ProgressTracker):
@@ -330,6 +379,7 @@
ProgressTracker.__init__(self)
self.act_started = False
+ self.ind_started = False
self.last_print_time = 0
try:
@@ -422,7 +472,7 @@
if self.spinner >= len(self.spinner_chars):
self.spinner = 0
print "%-50s..... %c%c" % \
- (self.ver_cur_fmri.get_pkg_stem(),
+ (self.ver_cur_fmri.get_pkg_stem(),
self.spinner_chars[self.spinner],
self.spinner_chars[self.spinner]),
print self.cr,
@@ -519,3 +569,39 @@
raise PipeError, e
raise
+ def ind_output(self, force=False):
+ if force or (time.time() - self.last_print_time) >= IND_DELAY:
+ self.last_print_time = time.time()
+ else:
+ return
+
+ try:
+ # The first time, emit header.
+ if not self.ind_started:
+ self.ind_started = True
+ print "%-40s %11s" % ("PHASE", "ITEMS")
+ else:
+ print self.cr,
+
+ print "%-40s %11s" % \
+ (
+ self.ind_phase,
+ "%d/%d" % (self.ind_cur_nitems,
+ self.ind_goal_nitems)
+ ),
+
+ sys.stdout.flush()
+ except IOError, e:
+ if e.errno == errno.EPIPE:
+ raise PipeError, e
+ raise
+
+ def ind_output_done(self):
+ self.ind_output(force=True)
+ try:
+ print
+ sys.stdout.flush()
+ except IOError, e:
+ if e.errno == errno.EPIPE:
+ raise PipeError, e
+ raise
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/client/query_engine.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,56 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+import pkg.search_storage as ss
+import pkg.search_errors as search_errors
+import pkg.query_engine as qe
+
+class Query(qe.Query):
+ """ The class which handles all query parsing and representation. """
+ # The empty class is present to allow consumers to import a single
+ # query engine module rather than have to import the client/server
+ # one as well as the base one.
+ pass
+
+class ClientQueryEngine(qe.QueryEngine):
+ """This class contains the data structures and methods needed to
+ perform search on the indexes created by Indexer."""
+
+ def search(self, query):
+ """Searches the indexes for any matches of query
+ and returns the results."""
+
+ self._open_dicts()
+
+ try:
+ self._data_token_offset.read_dict_file()
+ matched_ids, res = self.search_internal(query)
+ for n, d in self._data_dict.iteritems():
+ if d == self._data_main_dict or \
+ d == self._data_token_offset:
+ continue
+ d.matching_read_dict_file(matched_ids[n])
+ finally:
+ self._close_dicts()
+ return self.get_results(res)
--- a/src/modules/fmri.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/fmri.py Fri Jul 25 13:56:38 2008 -0700
@@ -167,14 +167,19 @@
def get_version(self):
return self.version.get_short_version()
- def get_pkg_stem(self, default_authority = None, anarchy = False):
+ def get_pkg_stem(self, default_authority = None, anarchy = False,
+ include_pkg = True):
"""Return a string representation of the FMRI without a specific
version. Anarchy returns a stem without any authority."""
+ pkg_str = ""
if not self.authority or \
self.authority.startswith(PREF_AUTH_PFX) or anarchy:
- return "pkg:/%s" % self.pkg_name
-
- return "pkg://%s/%s" % (self.authority, self.pkg_name)
+ if include_pkg:
+ pkg_str = "pkg:/"
+ return "%s%s" % (pkg_str, self.pkg_name)
+ if include_pkg:
+ pkg_str = "pkg://"
+ return "%s%s/%s" % (pkg_str, self.authority, self.pkg_name)
def get_short_fmri(self, default_authority = None):
"""Return a string representation of the FMRI without a specific
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/indexer.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,807 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+
+# Indexer is a class designed to index a set of manifests or pkg plans
+# and provide a compact representation on disk which is quickly searchable.
+#
+# The file format it uses consists of 7 dictionaries. Each of these dictionaries
+# list a version number in their first line. This version number is what allows
+# the files to be opened consistently even when an re-index is happeneing. 5
+# of these dictionaries (id_to_fmri_dict, id_to_action_dict,
+# id_to_token_type_dict, id_to_keyval_dict and id_to_version_dict) are stored
+# as an unsorted list. The line number of each entry corresponds to the id
+# number the main dictionary uses for that entry. Full fmri list is a list of
+# all packages which the current index has indexed. This is used for checking
+# whether a package needs to be reindexed on a catalog rebuild.
+#
+# Here is an example of a line from the main dictionary, it is explained below:
+# %gconf.xml (5,3,65689 => 249,202) (5,3,65690 => 249,202)
+# (5,3,65691 => 249,202) (5,3,65692 => 249,202)
+#
+# The main dictionary has a more complicated format. Each line begins with a
+# search token (%gconf.xml) followed by a list of mappings. Each mapping takes
+# a token_type, action, and keyvalue tuple ((5,3,65689), (5,3,65690),
+# (5,3,65691), (5,3,65692)) to a list of pkg-stem, version pairs (249,202) in
+# which the token is found in an action with token_type, action, and keyvalues
+# matching the tuple. Further compaction is gained by storing everything but
+# the token as an id which the other dictionaries can turn into human-readable
+# content.
+#
+# In short, the definition of a main dictionary entry is:
+# Note: "(", ")", and "=>" actually appear in the file
+# "[", "]", and "+" are used to specify pattern
+# token [(token_type_id, action_id, keyval_id => [pkg_stem_id,version_id ]+)]+
+#
+# To use this class, construct one passing the directory to use for index
+# storage to the contructor. For example:
+# ind = Indexer('/usr/foo/path/to/image/or/repo/index')
+# Externally, create either a list of fmri and path to that fmri pairs
+# or build a pkg_plan. These should contain the changed, added, or removed,
+# packages to the system.
+#
+# The client code should use
+# client_update_index(pkgplanList, tmp_index_dir = ?)
+# where tmp_index_dir allows a different directory than the default to be
+# used for storing the index while it's being built. The default is to
+# create a subdirectory TMP of the index directory and store the output
+# in there temporarily.
+#
+# The server code should use
+# server_update_index(self, fmri_manifest_list, tmp_index_dir = ?)
+#
+# The assumption is that the client will always be passing pkg plans
+# (with one exception) while the server will always be passing
+# fmri's paired with the paths to their manifests. The one exception
+# to the client side assumption is when the index is being rebuilt.
+# In that case, the client calls check_index. check_index only
+# rebuilds the index if the index is empty or if it is forced
+# to by an argument.
+#
+# If the storage structure is changed substantially, it will be necessary
+# to change _calc_ram_use to reflect the new structures. The figures used
+# were generated during a server index of a repository by taking a sampling
+# of the internal structures after every manifest read and observing the memory
+# usage reported by pmap. This provided a correlation of .9966 between the
+# predicted and observed memory used during that run. When applied to a client
+# side index it provided a correlation of .9964 between the predicted and
+# observed memory used.
+
+import os
+import urllib
+import shutil
+import errno
+
+import pkg.version
+
+import pkg.manifest as manifest
+import pkg.search_storage as ss
+import pkg.search_errors as search_errors
+
+# Constants for indicating whether pkgplans or fmri-manifest path pairs are
+# used as arguments.
+IDX_INPUT_TYPE_PKG = 0
+IDX_INPUT_TYPE_FMRI = 1
+
+INITIAL_VERSION_NUMBER = 1
+
+class Indexer(object):
+ """ See block comment at top for documentation """
+ file_version_string = "VERSION: "
+
+ def __init__(self, index_dir, default_max_ram_use, progtrack=None):
+ self._num_keys = 0
+ self._num_manifests = 0
+ self._num_entries = 0
+ self._max_ram_use = float(os.environ.get("PKG_INDEX_MAX_RAM",
+ default_max_ram_use)) * 1024
+
+ # This structure was used to gather all index files into one
+ # location. If a new index structure is needed, the files can
+ # be added (or removed) from here. Providing a list or
+ # dictionary allows an easy approach to opening or closing all
+ # index files.
+
+ self._data_dict = {
+ 'fmri': ss.IndexStoreListDict('id_to_fmri_dict.ascii'),
+ 'action':
+ ss.IndexStoreListDict('id_to_action_dict.ascii'),
+ 'tok_type':
+ ss.IndexStoreListDict(
+ 'id_to_token_type_dict.ascii'),
+ 'version':
+ ss.IndexStoreListDict('id_to_version_dict.ascii',
+ Indexer._build_version),
+ 'keyval':
+ ss.IndexStoreListDict('id_to_keyval_dict.ascii'),
+ 'full_fmri': ss.IndexStoreSet('full_fmri_list'),
+ 'main_dict': ss.IndexStoreMainDict('main_dict.ascii'),
+ 'token_byte_offset':
+ ss.IndexStoreDictMutable('token_byte_offset')
+ }
+
+ self._data_fmri = self._data_dict['fmri']
+ self._data_action = self._data_dict['action']
+ self._data_tok_type = self._data_dict['tok_type']
+ self._data_version = self._data_dict['version']
+ self._data_keyval = self._data_dict['keyval']
+ self._data_full_fmri = self._data_dict['full_fmri']
+ self._data_main_dict = self._data_dict['main_dict']
+ self._data_token_offset = self._data_dict['token_byte_offset']
+
+ self._index_dir = index_dir
+ self._tmp_dir = os.path.join(self._index_dir, "TMP")
+
+ self._indexed_manifests = 0
+ self.server_repo = True
+ self.empty_index = False
+ self.file_version_number = None
+
+ self._progtrack = progtrack
+
+ @staticmethod
+ def _build_version(vers):
+ """ Private method for building versions from a string. """
+ return pkg.version.Version(urllib.unquote(vers), None)
+
+ def _read_input_indexes(self, directory):
+ """ Opens all index files using consistent_open and reads all
+ of them into memory except the main dictionary file to avoid
+ inefficient memory usage.
+
+ """
+ res = ss.consistent_open(self._data_dict.values(), directory)
+ if self._progtrack is not None:
+ self._progtrack.index_set_goal(
+ "Reading Existing Index", len(self._data_dict))
+ if res == None:
+ self.file_version_number = INITIAL_VERSION_NUMBER
+ self.empty_index = True
+ return None
+ self.file_version_number = res
+
+ try:
+ try:
+ for d in self._data_dict.values():
+ if (d == self._data_main_dict or
+ d == self._data_token_offset):
+ if self._progtrack is not None:
+ self._progtrack.index_add_progress()
+ continue
+ d.read_dict_file()
+ if self._progtrack is not None:
+ self._progtrack.index_add_progress()
+ except:
+ self._data_dict['main_dict'].close_file_handle()
+ raise
+ finally:
+ for d in self._data_dict.values():
+ if d == self._data_main_dict:
+ continue
+ d.close_file_handle()
+ if self._progtrack is not None:
+ self._progtrack.index_done()
+
+ def _add_terms(self, added_fmri, new_dict, added_dict):
+ """ Adds the terms in new_dict to added_dict as pointers to
+ added_fmri. Returns the number of entries added.
+ """
+
+ # Originally the structure of added_dict was
+ # dict -> dict -> set. This arrangement wasted an enormous
+ # amount of space on the overhead of the second level
+ # dictionaries and third level sets. That structure was
+ # replaced by dict -> list of
+ # (key, list of (fmri, version) tuples) tuples.
+ #
+ # Because the second and third levels are small,
+ # especially compared to the top level, doing a linear search
+ # through the second level list is worth the savings of not
+ # using dictionaries at the second level.
+ #
+ # The use of a set at the third level was to prevent
+ # duplicate entries of a fmri-version tuple; however, this
+ # should almost never happen as manifests cannot
+ # contain duplicate actions. The only way for duplicate
+ # entries to occur is for a token to be repeated
+ # within an action. For example a description of "the package
+ # installs foo in the bar directory and installs baz in the
+ # random directory" would have duplicate entries for "the"
+ # and "installs." This problem is resolved by the conversion
+ # of the list into a set in write_main_dict_line prior to the
+ # list being written to.
+
+ added_terms = 0
+ version = added_fmri.version
+ pkg_stem = added_fmri.get_pkg_stem(anarchy=True)
+ fmri_id = self._data_fmri.get_id_and_add(pkg_stem)
+ version_id = self._data_version.get_id_and_add(version)
+ for tok_type in new_dict.keys():
+ tok_type_id = \
+ self._data_tok_type.get_id_and_add(tok_type)
+ for tok in new_dict[tok_type]:
+ if not (tok in added_dict):
+ added_dict[tok] = []
+ ak_list = new_dict[tok_type][tok]
+ for action, keyval in ak_list:
+ action_id = self._data_action.get_id_and_add(action)
+ keyval_id = self._data_keyval.get_id_and_add(keyval)
+ s = (tok_type_id, action_id, keyval_id)
+ found = False
+ tup = fmri_id, version_id
+ for (list_s, list_set) in \
+ added_dict[tok]:
+ if list_s == s:
+ list_set.append(tup)
+ found = True
+ break
+ if not found:
+ tmp_set = []
+ tmp_set.append(tup)
+ added_dict[tok].append(
+ (s, tmp_set))
+ added_terms += 1
+ return added_terms
+
+ @staticmethod
+ def _calc_ram_use(dict_size, ids, total_terms):
+ """ Estimates RAM used based on size of added and
+ removed dictionaries. It returns an estimated size in KB.
+ """
+ # As noted above, these numbers were estimated through
+ # experimentation. Do not change these unless the
+ # data structure has changed. If it's necessary to change
+ # them, resstimating them experimentally will
+ # be necessary.
+ return 0.5892 * dict_size + -0.12295 * ids + \
+ -0.009595 * total_terms + 23512
+
+ def _process_pkgplan_list(self, pkgplan_info, start_point):
+ """ Takes a list of pkg plans and updates the internal storage
+ to reflect the changes to the installed packages that plan
+ reflects.
+ """
+ (d_filters, pkgplan_list) = pkgplan_info
+
+ added_dict = {}
+ removed_packages = set()
+
+ remove_action_ids = set()
+ remove_keyval_ids = set()
+ remove_fmri_ids = set()
+ remove_version_ids = set()
+ remove_tok_type_ids = set()
+
+ total_terms = 0
+ stopping_early = False
+
+ if self._progtrack is not None and start_point == 0:
+ self._progtrack.index_set_goal("Indexing Packages",
+ len(pkgplan_list))
+
+ while start_point < len(pkgplan_list) and not stopping_early:
+ (d_fmri, d_manifest_path, o_fmri,
+ o_manifest_path) = \
+ pkgplan_list[start_point]
+ dest_fmri = d_fmri
+ origin_fmri = o_fmri
+
+ start_point += 1
+
+ # The pkg plan for a newly added package has an origin
+ # fmri of None. In that case, there's nothing
+ # to remove.
+ if origin_fmri is not None:
+ self._data_full_fmri.remove_entity(
+ origin_fmri.get_fmri(anarchy=True))
+ mfst = manifest.Manifest()
+ mfst_file = file(o_manifest_path)
+ mfst.set_content(mfst_file.read())
+ origin_dict = mfst.search_dict()
+ version = origin_fmri.version
+ pkg_stem = \
+ origin_fmri.get_pkg_stem(anarchy=True)
+ fmri_id = self._data_fmri.get_id(pkg_stem)
+ version_id = self._data_version.get_id(version)
+ remove_fmri_ids.add(fmri_id)
+ remove_version_ids.add(version_id)
+ for tok_type in origin_dict.keys():
+ tok_type_id = \
+ self._data_tok_type.get_id(tok_type)
+ remove_tok_type_ids.add(tok_type_id)
+ for tok in origin_dict[tok_type]:
+ ak_list = \
+ origin_dict[tok_type][tok]
+ for action, keyval in ak_list:
+ action_id = \
+ self._data_action.get_id(action)
+ keyval_id = \
+ self._data_keyval.get_id(keyval)
+ remove_action_ids.add(action_id)
+ remove_keyval_ids.add(keyval_id)
+ removed_packages.add( \
+ (fmri_id,
+ version_id))
+
+ # The pkg plan when a package is uninstalled has a
+ # dest_fmri of None, in which case there's nothing
+ # to add.
+ if dest_fmri is not None:
+ self._data_full_fmri.add_entity(
+ dest_fmri.get_fmri(anarchy=True))
+ mfst = manifest.Manifest()
+ mfst_file = file(d_manifest_path)
+ mfst.set_content(mfst_file.read())
+ mfst.filter(d_filters)
+ dest_dict = mfst.search_dict()
+ total_terms += self._add_terms(dest_fmri,
+ dest_dict, added_dict)
+
+ t_cnt = 0
+ for d in self._data_dict.values():
+ t_cnt += d.count_entries_removed_during_partial_indexing()
+
+ est_ram_use = self._calc_ram_use(len(added_dict), t_cnt,
+ (total_terms + len(removed_packages)))
+
+ if self._progtrack is not None:
+ self._progtrack.index_add_progress()
+
+ if est_ram_use >= self._max_ram_use:
+ stopping_early = True
+ break
+
+ return (stopping_early, start_point, (added_dict,
+ removed_packages, remove_action_ids, remove_fmri_ids,
+ remove_keyval_ids, remove_tok_type_ids, remove_version_ids))
+
+ def _process_fmri_manifest_list(self, fmri_manifest_list, start_point):
+ """ Takes a list of fmri, manifest pairs and updates the
+ internal storage to reflect the new packages.
+ """
+ added_dict = {}
+ removed_packages = set()
+
+ remove_action_ids = set()
+ remove_keyval_ids = set()
+ remove_fmri_ids = set()
+ remove_version_ids = set()
+ remove_tok_type_ids = set()
+
+ stopping_early = False
+ total_terms = 0
+
+ if self._progtrack is not None and start_point == 0:
+ self._progtrack.index_set_goal("Indexing Packages",
+ len(fmri_manifest_list))
+
+ while start_point < len(fmri_manifest_list) and \
+ not stopping_early:
+ added_fmri, manifest_path = \
+ fmri_manifest_list[start_point]
+ start_point += 1
+ self._data_full_fmri.add_entity(added_fmri.get_fmri())
+ mfst = manifest.Manifest()
+ mfst_file = file(manifest_path)
+ mfst.set_content(mfst_file.read())
+ new_dict = mfst.search_dict()
+ total_terms += self._add_terms(added_fmri, new_dict,
+ added_dict)
+
+ t_cnt = 0
+ for d in self._data_dict.values():
+ t_cnt += \
+ d.count_entries_removed_during_partial_indexing()
+
+ est_ram_use = self._calc_ram_use(len(added_dict), t_cnt,
+ (total_terms + len(removed_packages)))
+
+ if self._progtrack is not None:
+ self._progtrack.index_add_progress()
+
+ if est_ram_use >= self._max_ram_use:
+ stopping_early = True
+ break
+
+ return (stopping_early, start_point, (added_dict,
+ removed_packages, remove_action_ids, remove_fmri_ids,
+ remove_keyval_ids, remove_tok_type_ids, remove_version_ids))
+
+ def _write_main_dict_line(self, file_handle, token, k_k_list_list,
+ remove_action_ids, remove_keyval_ids, remove_tok_type_ids,
+ remove_fmri_ids, remove_version_ids):
+ """ Writes out the new main dictionary file and also adds the
+ token offsets to _data_token_offset.
+ """
+
+ cur_location = file_handle.tell()
+ self._data_token_offset.write_entity(token, cur_location)
+
+ tmp = {}
+
+ for (k, k_list) in k_k_list_list:
+ tok_type_id, action_id, keyval_id = k
+ remove_action_ids.discard(action_id)
+ remove_keyval_ids.discard(keyval_id)
+ remove_tok_type_ids.discard(tok_type_id)
+ # This conversion to a set is necessary to prevent
+ # duplicate entries. See the block comment in
+ # add_terms for more details.
+ tmp[k] = set(k_list)
+ for pkg_id, version_id in k_list:
+ remove_fmri_ids.discard(pkg_id)
+ remove_version_ids.discard(version_id)
+ self._data_main_dict.write_main_dict_line(file_handle,
+ token, tmp)
+
+
+ def _update_index(self, dicts, out_dir):
+ """ Processes the main dictionary file and writes out a new
+ main dictionary file reflecting the changes in the packages.
+ """
+ (added_dict, removed_packages, remove_action_ids,
+ remove_fmri_ids, remove_keyval_ids, remove_tok_type_ids,
+ remove_version_ids) = dicts
+
+ if self.empty_index:
+ file_handle = []
+ else:
+ file_handle = self._data_main_dict.get_file_handle()
+ assert file_handle
+
+ if self.file_version_number == None:
+ self.file_version_number = INITIAL_VERSION_NUMBER
+ else:
+ self.file_version_number += 1
+
+ self._data_main_dict.write_dict_file(
+ out_dir, self.file_version_number)
+ # The dictionary file's opened in append mode to avoid removing
+ # the version information the search storage class added.
+ out_main_dict_handle = \
+ open(os.path.join(out_dir,
+ self._data_main_dict.get_file_name()), 'ab')
+
+ self._data_token_offset.open_out_file(out_dir,
+ self.file_version_number)
+
+ added_toks = added_dict.keys()
+ added_toks.sort()
+ added_toks.reverse()
+
+ try:
+ for line in file_handle:
+ (tok, entries) = \
+ self._data_main_dict.parse_main_dict_line(
+ line)
+ new_entries = []
+ for (tok_type_id, action_id, keyval_id,
+ fmri_ids) in entries:
+ k = (tok_type_id, action_id, keyval_id)
+ fmri_list = []
+ for fmri_version in fmri_ids:
+ if not fmri_version in \
+ removed_packages:
+ fmri_list.append(
+ fmri_version)
+ if fmri_list:
+ new_entries.append(
+ (k, fmri_list))
+ # Add tokens newly discovered in the added
+ # packages which are alphabetically earlier
+ # than the token most recently read from the
+ # existing main dictionary file.
+ while added_toks and (added_toks[-1] < tok):
+ new_tok = added_toks.pop()
+ assert ' ' not in new_tok
+ assert len(new_tok) > 0
+ self._write_main_dict_line(
+ out_main_dict_handle,
+ new_tok, added_dict[new_tok],
+ remove_action_ids,
+ remove_keyval_ids,
+ remove_tok_type_ids,
+ remove_fmri_ids, remove_version_ids)
+
+ # Combine the information about the current
+ # token from the new packages with the existing
+ # information for that token.
+ if added_dict.has_key(tok):
+ tmp = added_toks.pop()
+ assert tmp == tok
+ for (k, k_list) in added_dict[tok]:
+ found = False
+ for (j, j_list) in new_entries:
+ if j == k:
+ found = True
+ j_list.extend(
+ k_list)
+ break
+ if not found:
+ new_entries.append(
+ (k, k_list))
+ # If this token has any packages still
+ # associated with it, write them to the file.
+ if new_entries:
+ assert ' ' not in tok
+ assert len(tok) > 0
+ self._write_main_dict_line(
+ out_main_dict_handle,
+ tok, new_entries, remove_action_ids,
+ remove_keyval_ids,
+ remove_tok_type_ids,
+ remove_fmri_ids, remove_version_ids)
+ finally:
+ if not self.empty_index:
+ file_handle.close()
+ self._data_main_dict.close_file_handle()
+
+ # For any new tokens which are alphabetically after the last
+ # entry in the existing file, add them to the end of the file.
+ while added_toks:
+ new_tok = added_toks.pop()
+ assert ' ' not in new_tok
+ assert len(new_tok) > 0
+ self._write_main_dict_line(
+ out_main_dict_handle,
+ new_tok, added_dict[new_tok], remove_action_ids,
+ remove_keyval_ids, remove_tok_type_ids,
+ remove_fmri_ids, remove_version_ids)
+ out_main_dict_handle.close()
+ self._data_token_offset.close_file_handle()
+
+ # Things in remove_* are no longer found in the
+ # main dictionary and can be safely removed. This
+ # allows for reuse of space.
+ for tmp_id in remove_action_ids:
+ self._data_action.remove_id(tmp_id)
+ for tmp_id in remove_keyval_ids:
+ self._data_keyval.remove_id(tmp_id)
+ for tmp_id in remove_fmri_ids:
+ self._data_fmri.remove_id(tmp_id)
+ for tmp_id in remove_tok_type_ids:
+ self._data_tok_type.remove_id(tmp_id)
+ for tmp_id in remove_version_ids:
+ self._data_version.remove_id(tmp_id)
+
+ added_dict.clear()
+ removed_packages.clear()
+
+ def _write_assistant_dicts(self, out_dir):
+ """ Write out the companion dictionaries needed for
+ translating the internal representation of the main
+ dictionary into human readable information. """
+ for d in self._data_dict.values():
+ if d == self._data_main_dict or \
+ d == self._data_token_offset:
+ continue
+ d.write_dict_file(out_dir, self.file_version_number)
+
+ def _generic_update_index(self, input_list, input_type,
+ tmp_index_dir = None):
+ """ Performs all the steps needed to update the indexes."""
+
+ # Allow the use of a directory other than the default
+ # directory to store the intermediate results in.
+ if not tmp_index_dir:
+ tmp_index_dir = self._tmp_dir
+ assert not (tmp_index_dir == self._index_dir)
+
+ # Read the existing dictionaries.
+ self._read_input_indexes(self._index_dir)
+
+ # If the tmp_index_dir exists, it suggests a previous indexing
+ # attempt aborted or that another indexer is running. In either
+ # case, throw an exception.
+ try:
+ os.makedirs(tmp_index_dir)
+ except OSError, e:
+ if e.errno == errno.EEXIST:
+ raise \
+ search_errors.PartialIndexingException(
+ tmp_index_dir)
+ else:
+ raise
+
+ more_to_do = True
+ start_point = 0
+
+ while more_to_do:
+
+ assert start_point >= 0
+
+ if input_type == IDX_INPUT_TYPE_PKG:
+ (more_to_do, start_point, dicts) = \
+ self._process_pkgplan_list(input_list,
+ start_point)
+ elif input_type == IDX_INPUT_TYPE_FMRI:
+ (more_to_do, start_point, dicts) = \
+ self._process_fmri_manifest_list(
+ input_list, start_point)
+ else:
+ raise RuntimeError("Got unknown input_type: %s",
+ input_type)
+
+ # Update the main dictionary file
+ self._update_index(dicts, tmp_index_dir)
+
+ self.empty_index = False
+
+ if more_to_do:
+ self._data_main_dict.shift_file(tmp_index_dir,
+ ("_" + str(start_point)))
+
+ # Write out the helper dictionaries
+ self._write_assistant_dicts(tmp_index_dir)
+
+ # Move all files from the tmp directory into the index dir
+ # Note: the need for consistent_open is that migrate is not
+ # an atomic action.
+ self._migrate(source_dir = tmp_index_dir)
+
+ if self._progtrack is not None:
+ self._progtrack.index_done()
+
+ def client_update_index(self, pkgplan_list, tmp_index_dir = None):
+ """ This version of update index is designed to work with the
+ client side of things. Specifically, it expects a pkg plan
+ list with added and removed FMRIs/manifests. Note: if
+ tmp_index_dir is specified, it must NOT exist in the current
+ directory structure. This prevents the indexer from
+ accidentally removing files.
+ """
+ assert self._progtrack is not None
+ self._generic_update_index(pkgplan_list, IDX_INPUT_TYPE_PKG,
+ tmp_index_dir)
+
+ def server_update_index(self, fmri_manifest_list, tmp_index_dir = None):
+ """ This version of update index is designed to work with the
+ server side of things. Specifically, since we don't currently
+ support removal of a package from a repo, this function simply
+ takes a list of FMRIs to be added to the repot. Currently, the
+ only way to remove a package from the index is to remove it
+ from the depot and reindex. Note: if tmp_index_dir is
+ specified, it must NOT exist in the current directory structure.
+ This prevents the indexer from accidentally removing files.
+ """
+ self._generic_update_index(fmri_manifest_list,
+ IDX_INPUT_TYPE_FMRI, tmp_index_dir)
+
+ def check_index_existence(self):
+ """ Returns a boolean value indicating whether a consistent
+ index exists.
+
+ """
+ try:
+ try:
+ res = \
+ ss.consistent_open(self._data_dict.values(),
+ self._index_dir)
+ except Exception:
+ return False
+ finally:
+ for d in self._data_dict.values():
+ d.close_file_handle()
+ assert res is not 0
+ return res
+
+ def check_index(self, fmris, force_rebuild, tmp_index_dir = None):
+ """ Rebuilds the indexes using the given fmris if it is
+ needed. It's needed if the index is empty or if force_rebuild
+ is true.
+ """
+ if not force_rebuild:
+ try:
+ res = \
+ ss.consistent_open(self._data_dict.values(),
+ self._index_dir)
+ finally:
+ for d in self._data_dict.values():
+ d.close_file_handle()
+ if res == None:
+ self.file_version_number = \
+ INITIAL_VERSION_NUMBER
+ self.empty_index = True
+ else:
+ return
+
+ try:
+ shutil.rmtree(self._index_dir)
+ os.makedirs(self._index_dir)
+ except OSError, e:
+ if e.errno == errno.EACCES:
+ raise search_errors.ProblematicPermissionsIndexException(
+ self._index_dir)
+ self._generic_update_index(fmris, IDX_INPUT_TYPE_FMRI,
+ tmp_index_dir)
+ self.empty_index = False
+
+ def setup(self):
+ """ Seeds the index directory with empty stubs if the directory
+ is consistently empty. Does not overwrite existing indexes.
+ """
+ absent = False
+ present = False
+ for d in self._data_dict.values():
+ file_path = os.path.join(self._index_dir,
+ d.get_file_name())
+ if os.path.exists(file_path):
+ present = True
+ else:
+ absent = True
+ if absent and present:
+ raise \
+ search_errors.InconsistentIndexException( \
+ self._index_dir)
+ if present:
+ return
+ if self.file_version_number:
+ raise RuntimeError("Got file_version_number other"
+ "than None in setup.")
+ self.file_version_number = INITIAL_VERSION_NUMBER
+ for d in self._data_dict.values():
+ d.write_dict_file(self._index_dir,
+ self.file_version_number)
+
+ @staticmethod
+ def check_for_updates(index_root, fmri_set):
+ """ Checks fmri_set to see which members have not been indexed.
+ It modifies fmri_set.
+ """
+ data = ss.IndexStoreSet('full_fmri_list')
+ try:
+ data.open(index_root)
+ except IOError, e:
+ if not os.path.exists(os.path.join(
+ index_root, data.get_file_name())):
+ return fmri_set
+ else:
+ raise
+ try:
+ data.read_and_discard_matching_from_argument(fmri_set)
+ finally:
+ data.close_file_handle()
+
+ def _migrate(self, source_dir = None, dest_dir = None):
+ """ Moves the indexes from a temporary directory to the
+ permanent one.
+ """
+ if not source_dir:
+ source_dir = self._tmp_dir
+ if not dest_dir:
+ dest_dir = self._index_dir
+ assert not (source_dir == dest_dir)
+ logfile = os.path.join(dest_dir, "log")
+ lf = open(logfile, 'wb')
+ lf.write("moving " + source_dir + " to " + dest_dir + "\n")
+ lf.flush()
+
+ for d in self._data_dict.values():
+ shutil.move(os.path.join(source_dir, d.get_file_name()),
+ os.path.join(dest_dir, d.get_file_name()))
+
+ lf.write("finished moving\n")
+ lf.close()
+ os.remove(logfile)
+ shutil.rmtree(source_dir)
--- a/src/modules/manifest.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/manifest.py Fri Jul 25 13:56:38 2008 -0700
@@ -24,7 +24,6 @@
import os
import errno
-import cPickle
from itertools import groupby, chain
import pkg.actions as actions
@@ -45,7 +44,7 @@
DEPEND_REQUIRE = 0
DEPEND_OPTIONAL = 1
-DEPEND_INCORPORATE =10
+DEPEND_INCORPORATE = 10
depend_str = { DEPEND_REQUIRE : "require",
DEPEND_OPTIONAL : "optional",
@@ -109,11 +108,11 @@
r = ""
if self.fmri != None:
r += "set name=fmri value=%s\n" % self.fmri
-
+
for act in self.actions:
r += "%s\n" % act
return r
-
+
def difference(self, origin):
"""Return three lists of action pairs representing origin and
@@ -250,7 +249,7 @@
action.attrs["path"] = np
self.size += int(action.attrs.get("pkg.size", "0"))
- self.actions.append(action)
+ self.actions.append(action)
return
@@ -259,32 +258,46 @@
action_dict = {}
for a in self.actions:
for k, v in a.generate_indices().iteritems():
- # The value might be a list if an indexed action
- # attribute is multivalued, such as driver
- # aliases.
- t = (a.name, a.attrs.get(a.key_attr))
+ # Special handling of AttributeActions is
+ # needed inorder to place the correct values
+ # into the correct output columns. This is
+ # the pattern of which information changes
+ # on an item by item basis is differs for
+ # AttributeActions.
+ #
+ # The right solution is probably to reorganize
+ # this function and all the generate_indicies
+ # functions to allow the necessary flexibility.
+ if isinstance(a,
+ actions.attribute.AttributeAction):
+ tok_type = a.attrs.get(a.key_attr)
+ t = (a.name, k)
+ else:
+ tok_type = k
+ t = (a.name, a.attrs.get(a.key_attr))
+ # The value might be a list if an indexed
+ # action attribute is multivalued, such as
+ # driver aliases.
if isinstance(v, list):
- if k in action_dict:
- action_dict[k].update(
- dict((i, t) for i in v))
- else:
- action_dict[k] = \
- dict((i, t) for i in v)
+ if tok_type in action_dict:
+ action_dict[tok_type].update(
+ dict((i, [t]) for i in v))
+ else:
+ action_dict[tok_type] = \
+ dict((i, [t]) for i in v)
else:
- # XXX if there's more than one k,v pair
- # in the manifest, only one will get
- # recorded. basename,gmake is one
- # example.
- if k in action_dict:
- action_dict[k][v] = t
+ if tok_type not in action_dict:
+ action_dict[tok_type] = \
+ { v: [t] }
+ elif v not in action_dict[tok_type]:
+ action_dict[tok_type][v] = [t]
else:
- action_dict[k] = { v: t }
+ action_dict[tok_type][v].append(t)
+ assert action_dict[tok_type][v]
return action_dict
- def store(self, mfst_path, pkl_path):
- """ Store the manifest contents and the pickled index
- of the manifest (used for searching) to disk.
- """
+ def store(self, mfst_path):
+ """Store the manifest contents to disk."""
try:
mfile = file(mfst_path, "w")
@@ -303,19 +316,6 @@
#
mfile.write(self.tostr_unsorted())
mfile.close()
-
- try:
- pfile = open(pkl_path, "wb")
- except IOError, e:
- return
-
- try:
- cPickle.dump(self.search_dict(), pfile,
- protocol = cPickle.HIGHEST_PROTOCOL)
- pfile.close()
- except (IOError, e):
- os.unlink(pkl_path)
- pass
def get(self, key, default):
try:
--- a/src/modules/misc.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/misc.py Fri Jul 25 13:56:38 2008 -0700
@@ -234,7 +234,7 @@
self.args = args
def msg(*text):
- """ Emit a message. """
+ """ Emit a message. """
try:
print ' '.join([str(l) for l in text])
@@ -244,7 +244,7 @@
raise
def emsg(*text):
- """ Emit a message to sys.stderr. """
+ """ Emit a message to sys.stderr. """
try:
print >> sys.stderr, ' '.join([str(l) for l in text])
@@ -321,3 +321,16 @@
class TransferTimedOutException(Exception):
def __init__(self, args = None):
self.args = args
+
+
+# Default maximum memory useage during indexing
+# This is a soft cap since memory usage is estimated.
+try:
+ phys_pages = os.sysconf("SC_PHYS_PAGES")
+ page_size = os.sysconf("SC_PAGE_SIZE")
+ SERVER_DEFAULT_MEM_USE_KB = (phys_pages / 1024.0) * page_size / 3
+ CLIENT_DEFAULT_MEM_USE_KB = SERVER_DEFAULT_MEM_USE_KB / 2.0
+
+except:
+ CLIENT_DEFAULT_MEM_USE_KB = 100
+ SERVER_DEFAULT_MEM_USE_KB = 500
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/query_engine.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,188 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+import fnmatch
+
+import pkg.search_storage as ss
+
+FILE_OPEN_TIMEOUT_SECS = 5
+
+class Query(object):
+ """The class which handles all query parsing and representation. """
+
+ def __init__(self, term):
+ term = term.strip()
+ self._glob = False
+ if '*' in term or '?' in term or '[' in term:
+ self._glob = True
+ self._term = term
+
+ def get_term(self):
+ return self._term
+
+ def uses_glob(self):
+ return self._glob
+
+class QueryEngine(object):
+ """This class contains the data structures and methods needed to
+ perform search on the indexes created by Indexer.
+ """
+ def __init__(self, dir_path):
+
+ assert dir_path
+
+ self._dir_path = dir_path
+
+ self._file_timeout_secs = FILE_OPEN_TIMEOUT_SECS
+
+ # This structure was used to gather all index files into one
+ # location. If a new index structure is needed, the files can
+ # be added (or removed) from here. Providing a list or
+ # dictionary allows an easy approach to opening or closing all
+ # index files.
+
+ self._data_dict = {
+ 'fmri': ss.IndexStoreDict('id_to_fmri_dict.ascii'),
+ 'action': ss.IndexStoreDict('id_to_action_dict.ascii'),
+ 'tok_type':
+ ss.IndexStoreDict('id_to_token_type_dict.ascii'),
+ 'version':
+ ss.IndexStoreDict('id_to_version_dict.ascii'),
+ 'keyval': ss.IndexStoreDict('id_to_keyval_dict.ascii'),
+ 'main_dict': ss.IndexStoreMainDict('main_dict.ascii'),
+ 'token_byte_offset':
+ ss.IndexStoreDictMutable('token_byte_offset')
+ }
+
+ self._data_fmri = self._data_dict['fmri']
+ self._data_action = self._data_dict['action']
+ self._data_tok_type = self._data_dict['tok_type']
+ self._data_version = self._data_dict['version']
+ self._data_keyval = self._data_dict['keyval']
+ self._data_main_dict = self._data_dict['main_dict']
+ self._data_token_offset = self._data_dict['token_byte_offset']
+
+ def _open_dicts(self, raise_on_no_index=True):
+ ret = ss.consistent_open(self._data_dict.values(),
+ self._dir_path, self._file_timeout_secs)
+ if ret == None and raise_on_no_index:
+ raise search_errors.NoIndexException(self._dir_path)
+ return ret
+
+ def _close_dicts(self):
+ for d in self._data_dict.values():
+ d.close_file_handle()
+
+ def search_internal(self, query):
+ """Searches the indexes in dir_path for any matches of query
+ and the results in self.res. The method assumes the dictionaries
+ have already been loaded and read appropriately.
+ """
+
+ assert self._data_main_dict.get_file_handle() is not None
+
+ matched_ids = {
+ 'fmri': set(),
+ 'action': set(),
+ 'tok_type': set(),
+ 'version': set(),
+ 'keyval': set(),
+ 'main_dict': set(),
+ 'token_byte_offset': set()
+ }
+
+ res = {}
+
+ glob = query.uses_glob()
+ term = query.get_term()
+
+ offsets = []
+
+ if glob:
+ keys = self._data_token_offset.get_dict().keys()
+ if not keys:
+ # No matches were found.
+ return matched_ids, res
+ matches = fnmatch.filter(keys, term)
+ offsets = [
+ self._data_token_offset.get_id(match)
+ for match in matches
+ ]
+ offsets.sort()
+ elif not self._data_token_offset.has_entity(term):
+ # No matches were found
+ return matched_ids, res
+ else:
+ offsets.append(
+ self._data_token_offset.get_id(term))
+
+ md_fh = self._data_main_dict.get_file_handle()
+ for o in offsets:
+ md_fh.seek(o)
+ line = md_fh.readline()
+ assert not line == '\n'
+ tok, entries = self._data_main_dict.parse_main_dict_line(line)
+ assert ((term == tok) or
+ (glob and fnmatch.fnmatch(tok, term)))
+ for tok_type_id, action_id, keyval_id, \
+ fmri_ids in entries:
+ matched_ids['tok_type'].add(tok_type_id)
+ matched_ids['action'].add(action_id)
+ matched_ids['keyval'].add(keyval_id)
+
+ fmri_set = set()
+ for fmri_id, version_id in fmri_ids:
+ fmri_set.add((fmri_id,
+ version_id))
+ matched_ids['version'].add(
+ version_id)
+ matched_ids['fmri'].add(fmri_id)
+ fmri_list = list(fmri_set)
+ fmri_list.sort()
+ res[(tok_type_id,
+ action_id, keyval_id)] = fmri_list
+ return matched_ids, res
+
+ def get_results(self, res):
+ """Uses the data generated by calling search to generate
+ results of the search.
+ """
+
+ send_res = []
+
+ # Construct the answer for the search_0 format
+ for k in res.keys():
+ tok_type_id, action_id, keyval_id = k
+ tok_type = self._data_tok_type.get_entity(tok_type_id)
+ action = self._data_action.get_entity(action_id)
+ keyval = self._data_keyval.get_entity(keyval_id)
+ fmri_list = res[k]
+ for pkg_id, version_id in fmri_list:
+ fmri_res = \
+ self._data_fmri.get_entity(pkg_id) + \
+ "@" + \
+ self._data_version.get_entity(version_id)
+ send_res.append((tok_type, fmri_res,
+ action, keyval))
+ return send_res
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/search_errors.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,70 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+# __str__ methods defined for subclasses of IndexError should be defined
+# for the server implementations. If the client needs different messages
+# displayed, catch the exception on the client side and display a custom
+# message.
+
+class IndexingException(Exception):
+ """ The base class for all exceptions that can occur while indexing. """
+ def __init__(self, cause):
+ self.cause = cause
+
+class InconsistentIndexException(IndexingException):
+ """ This is used when the existing index is found to have inconsistent
+ versions."""
+ def __str__(self):
+ return "Index corrupted, remove all files and " \
+ "rebuild from scratch by clearing out %s " \
+ " and restarting the depot." % self.cause
+
+class PartialIndexingException(IndexingException):
+ """ This is used when the directory the temporary files the indexer
+ should write to already exists. """
+ def __str__(self):
+ return "Result of partial indexing found, " \
+ "please correct that before indexing anew. Could " \
+ "not make: %s because it " \
+ "already exists. Removing this directory and " \
+ "using the --rebuild-index flag should fix this " \
+ "problem." % self.cause
+
+class ProblematicPermissionsIndexException(IndexingException):
+ """ This is used when the indexer is unable to create, move, or remove
+ files or directories it should be able to. """
+ def __str__(self):
+ return "Could not remove or create " \
+ "%s because of incorrect " \
+ "permissions. Please correct this issue then " \
+ "rebuild the index." % self.cause
+
+class NoIndexException(Exception):
+ """ This is used when a search is executed while no index exists. """
+ def __init__(self, index_dir):
+ self.index_dir = index_dir
+ def __str__(self):
+ return "Could not find index to search, looked in: %s" \
+ % self.index_dir
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/search_storage.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,614 @@
+#!/usr/bin/python
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+# need to add locks to the dictionary reading so that we don't have
+# multiple threads loading in the dictionary at the same time
+
+import os
+import errno
+import time
+import stat
+
+import pkg.fmri as fmri
+import pkg.search_errors as search_errors
+import pkg.portable as portable
+
+def consistent_open(data_list, directory, timeout = None):
+ """Opens all data holders in data_list and ensures that the
+ versions are consistent among all of them.
+ It retries several times in case a race condition between file
+ migration and open is encountered.
+ Note: Do not set timeout to be 0. It will cause an exception to be
+ immediately raised.
+
+ """
+ missing = None
+ cur_version = None
+
+ start_time = time.time()
+
+ while cur_version == None and missing != True:
+ if timeout != None and ((time.time() - start_time) > timeout):
+ raise search_errors.InconsistentIndexException(
+ directory)
+ for d in data_list:
+ # All indexes must have the same version and all must
+ # either be present or absent for a successful return.
+ # If one of these conditions is not met, the function
+ # tries again until it succeeds or the time spent in
+ # in the function is greater than timeout.
+ try:
+ f = os.path.join(directory, d.get_file_name())
+ fh = open(f, 'rb')
+ # If we get here, then the current index file
+ # is present.
+ if missing == None:
+ missing = False
+ elif missing:
+ for dl in data_list:
+ dl.close_file_handle()
+ missing = None
+ cur_version = None
+ continue
+ d.set_file_handle(fh, f)
+ version_tmp = fh.next()
+ version_num = \
+ int(version_tmp.split(' ')[1].rstrip('\n'))
+ # Read the version. If this is the first file,
+ # set the expected version otherwise check that
+ # the version matches the expected version.
+ if cur_version == None:
+ cur_version = version_num
+ elif not (cur_version == version_num):
+ cur_version = None
+ # Got inconsistent versions, so close
+ # all files and try again.
+ for d in data_list:
+ d.close_file_handle()
+ missing = None
+ cur_version = None
+ continue
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ # If the index file is missing, ensure
+ # that previous files were missing as
+ # well. If not, try again.
+ if missing == False:
+ for d in data_list:
+ d.close_file_handle()
+ missing = None
+ cur_version = None
+ continue
+ missing = True
+ else:
+ for d in data_list:
+ d.close_file_handle()
+ raise
+ if missing:
+ assert cur_version == None
+ # The index is missing (ie, no files were present).
+ return None
+ else:
+ assert cur_version is not None
+ return cur_version
+
+
+class IndexStoreBase(object):
+ """Base class for all data storage used by the indexer and
+ queryEngine. All members must have a file name and maintain
+ an internal file handle to that file as instructed by external
+ calls.
+ """
+
+ def __init__(self, file_name):
+ self._name = file_name
+ self._file_handle = None
+ self._file_path = None
+ self._size = None
+ self._mtime = None
+
+ def get_file_name(self):
+ return self._name
+
+ def set_file_handle(self, f_handle, f_path):
+ if self._file_handle:
+ raise RuntimeError("setting an extant file handle, "
+ "must close first, fp is: " + f_path)
+ else:
+ self._file_handle = f_handle
+ self._file_path = f_path
+
+ def get_file_path(self):
+ return self._file_path
+
+ def close_file_handle(self):
+ """Closes the file handle and clears it so that it cannot
+ be reused.
+ """
+ if self._file_handle:
+ self._file_handle.close()
+ self._file_handle = None
+ self._file_path = None
+
+ def _protected_write_dict_file(self, path, version_num, iterable):
+ """Writes the dictionary in the expected format.
+ Note: Only child classes should call this method.
+ """
+ version_string = "VERSION: "
+ file_handle = open(os.path.join(path, self._name), 'wb')
+ file_handle.write(version_string + str(version_num) + "\n")
+ for name in iterable:
+ file_handle.write(str(name) + "\n")
+ file_handle.close()
+
+ def should_reread(self):
+ """This method uses the modification time and the file size
+ to (heuristically) determine whether the file backing this
+ storage has changed since it was last read.
+ """
+ stat_info = os.stat(self._file_path)
+ if self._mtime != stat_info.st_mtime or \
+ self._size != stat_info.st_size:
+ self._mtime = stat_info[stat.ST_MTIME]
+ self._size = stat_info[stat.ST_SIZE]
+ return True
+ return False
+
+ def open(self, directory):
+ """This uses consistent open to ensure that the version line
+ processing is done consistently and that only a single function
+ actually opens files stored using this class.
+ """
+ return consistent_open([self], directory)
+
+
+class IndexStoreMainDict(IndexStoreBase):
+ """Class for representing the main dictionary file
+ """
+ # Here is an example of a line from the main dictionary, it is
+ # explained below:
+ # %gconf.xml (5,3,65689 => 249,202) (5,3,65690 => 249,202)
+ # (5,3,65691 => 249,202) (5,3,65692 => 249,202)
+ #
+ # The main dictionary has a more complicated format. Each line begins
+ # with a search token (%gconf.xml) followed by a list of mappings. Each
+ # mapping takes a token_type, action, and keyvalue tuple ((5,3,65689),
+ # (5,3,65690), (5,3,65691), (5,3,65692)) to a list of pkg-stem, version
+ # pairs (249,202) in which the token is found in an action with
+ # token_type, action, and keyvalues matching the tuple. Further
+ # compaction is gained by storing everything but the token as an id
+ # which the other dictionaries can turn into human-readable content.
+ #
+ # In short, the definition of a main dictionary entry is:
+ # Note: "(", ")", and "=>" actually appear in the file
+ # "[", "]", and "+" are used to specify pattern
+ # token [(token_type_id, action_id, keyval_id => [pkg_stem_id,version_id ]+)]+
+
+ def __init__(self, file_name):
+ IndexStoreBase.__init__(self, file_name)
+ self._old_suffix = None
+
+ def write_dict_file(self, path, version_num):
+ """This class relies on external methods to write the file.
+ Making this empty call to protected_write_dict_file allows the
+ file to be set up correctly with the version number stored
+ correctly.
+ """
+ IndexStoreBase._protected_write_dict_file(self, path,
+ version_num, [])
+
+ def get_file_handle(self):
+ """Return the file handle. Note that doing
+ anything other than sequential reads or writes
+ to or from this file_handle may result in unexpected
+ behavior. In short, don't use seek.
+ """
+ return self._file_handle
+
+ @staticmethod
+ def parse_main_dict_line(line):
+ """Parses one line of a main dictionary file.
+ Changes to this function must be paired with changes to
+ write_main_dict_line below.
+ """
+ line = line.rstrip('\n')
+ tok_end = line.find(' ')
+ assert tok_end > 0
+ tok = line[:tok_end]
+ entries = line[tok_end + 2:].split('(')
+ res = []
+ for entry in entries:
+ tup, lst = entry.split('=>')
+ fmri_ids_text = lst.strip()
+ fmri_ids = fmri_ids_text.split(' ')
+ fmri_ids[len(fmri_ids) - 1] = \
+ fmri_ids[len(fmri_ids) - 1].strip(')')
+ tok_type_id, action_id, keyval_id = tup.split(',')
+ tok_type_id = int(tok_type_id)
+ action_id = int(action_id)
+ keyval_id = int(keyval_id)
+ processed_fmris = []
+ for label in fmri_ids:
+ fmri_id, version_id = label.split(',')
+ fmri_id = int(fmri_id)
+ version_id = int(version_id)
+ processed_fmris.append((fmri_id, version_id))
+ res.append((tok_type_id, action_id, keyval_id,
+ processed_fmris))
+ return (tok, res)
+
+ @staticmethod
+ def write_main_dict_line(file_handle, token, dictionary):
+ """Paired with parse_main_dict_line above. Writes
+ a line in a main dictionary file in the appropriate format.
+ """
+ file_handle.write(token)
+ for k in dictionary.keys():
+ tok_type_id, action_id, keyval_id = k
+ file_handle.write(" (" + str(tok_type_id) +
+ "," + str(action_id) + "," +
+ str(keyval_id) + " =>")
+ tmp_list = list(dictionary[k])
+ tmp_list.sort()
+ for pkg_id, version_id in tmp_list:
+ file_handle.write(" " + str(pkg_id) + "," +
+ str(version_id))
+ file_handle.write(")")
+ file_handle.write("\n")
+
+ def count_entries_removed_during_partial_indexing(self):
+ """Returns the number of entries removed during a second phase
+ of indexing.
+ """
+ # This returns 0 because this class is not responsible for
+ # storing anything in memory.
+ return 0
+
+ def shift_file(self, use_dir, suffix):
+ """Moves the existing file with self._name in directory
+ use_dir to a new file named self._name + suffix in directory
+ use_dir. If it has done this previously, it removes the old
+ file it moved. It also opens the newly moved file and uses
+ that as the file for its file handle.
+ """
+ assert self._file_handle is None
+ orig_path = os.path.join(use_dir, self._name)
+ new_path = os.path.join(use_dir, self._name + suffix)
+ portable.rename(orig_path, new_path)
+ tmp_name = self._name
+ self._name = self._name + suffix
+ self.open(use_dir)
+ self._name = tmp_name
+ if self._old_suffix is not None:
+ os.remove(os.path.join(use_dir, self._old_suffix))
+ self._old_suffix = self._name + suffix
+
+
+class IndexStoreListDict(IndexStoreBase):
+ """Used when both a list and a dictionary are needed to
+ store the information. Used for bidirectional lookup when
+ one item is an int (an id) and the other is not (an entity). It
+ maintains a list of empty spots in the list so that adding entities
+ can take advantage of unused space. It encodes empty space as a blank
+ line in the file format and '' in the internal list.
+ """
+
+ def __init__(self, file_name, build_function=None):
+ IndexStoreBase.__init__(self, file_name)
+ self._list = []
+ self._dict = {}
+ self._next_id = 0
+ self._list_of_empties = []
+ self._build_func = build_function
+ self._line_cnt = 0
+
+ def add_entity(self, entity, is_empty):
+ """Adds an entity consistently to the list and dictionary
+ allowing bidirectional lookup.
+ """
+ assert (len(self._list) == self._next_id)
+ if self._list_of_empties and not is_empty:
+ use_id = self._list_of_empties.pop(0)
+ assert use_id <= len(self._list)
+ if use_id == len(self._list):
+ self._list.append(entity)
+ self._next_id += 1
+ else:
+ self._list[use_id] = entity
+ else:
+ use_id = self._next_id
+ self._list.append(entity)
+ self._next_id += 1
+ if not(is_empty):
+ self._dict[entity] = use_id
+ assert (len(self._list) == self._next_id)
+ return use_id
+
+ def remove_id(self, in_id):
+ """deletes in_id from the list and the dictionary """
+ entity = self._list[in_id]
+ self._list[in_id] = ""
+ self._dict[entity] = ""
+
+ def remove_entity(self, entity):
+ """deletes the entity from the list and the dictionary """
+ in_id = self._dict[entity]
+ self._dict[entity] = ""
+ self._list[in_id] = ""
+
+ def get_id(self, entity):
+ """returns the id of entity """
+ return self._dict[entity]
+
+ def get_id_and_add(self, entity):
+ """Adds entity if it's not previously stored and returns the
+ id for entity.
+ """
+ # This code purposefully reimplements add_entity
+ # code. Replacing the function calls to has_entity, add_entity,
+ # and get_id with direct access to the data structure gave a
+ # speed up of a factor of 4. Because this is a very hot path,
+ # the tradeoff seemed appropriate.
+
+ if not self._dict.has_key(entity):
+ assert (len(self._list) == self._next_id)
+ if self._list_of_empties:
+ use_id = self._list_of_empties.pop(0)
+ assert use_id <= len(self._list)
+ if use_id == len(self._list):
+ self._list.append(entity)
+ self._next_id += 1
+ else:
+ self._list[use_id] = entity
+ else:
+ use_id = self._next_id
+ self._list.append(entity)
+ self._next_id += 1
+ self._dict[entity] = use_id
+ assert (len(self._list) == self._next_id)
+ return self._dict[entity]
+
+ def get_entity(self, in_id):
+ """return the entity in_id maps to """
+ return self._list[in_id]
+
+ def has_entity(self, entity):
+ """check if entity is in storage """
+ return self._dict.has_key(entity)
+
+ def has_empty(self):
+ """Check if the structure has any empty elements which
+ can be filled with data.
+ """
+ return (len(self._list_of_empties) > 0)
+
+ def get_next_empty(self):
+ """returns the next id which maps to no element """
+ return self._list_of_empties.pop()
+
+ def write_dict_file(self, path, version_num):
+ """Passes self._list to the parent class to write to a file.
+ """
+ IndexStoreBase._protected_write_dict_file(self, path,
+ version_num,
+ self._list)
+
+ def read_dict_file(self):
+ """Reads in a dictionary previously stored using the above
+ call
+ """
+ assert self._file_handle
+ if self.should_reread():
+ self._dict.clear()
+ self._list = []
+ for i, line in enumerate(self._file_handle):
+ # A blank line means that id can be reused.
+ tmp = line.rstrip('\n')
+ if line == '\n':
+ self._list_of_empties.append(i)
+ else:
+ if self._build_func:
+ tmp = self._build_func(tmp)
+ self._dict[tmp] = i
+ self._list.append(tmp)
+ self._line_cnt = i + 1
+ self._next_id = i + 1
+ return self._line_cnt
+
+ def count_entries_removed_during_partial_indexing(self):
+ """Returns the number of entries removed during a second phase
+ of indexing.
+ """
+ return len(self._list)
+
+class IndexStoreDict(IndexStoreBase):
+ """Class used when only entity -> id lookup is needed
+ """
+
+ def __init__(self, file_name):
+ IndexStoreBase.__init__(self, file_name)
+ self._dict = {}
+ self._next_id = 0
+
+ def get_dict(self):
+ return self._dict
+
+ def get_entity(self, in_id):
+ return self._dict[in_id]
+
+ def has_entity(self, entity):
+ return self._dict.has_key(entity)
+
+ def read_dict_file(self):
+ """Reads in a dictionary stored in line number -> entity
+ format
+ """
+ if self.should_reread():
+ self._dict.clear()
+ for line_cnt, line in enumerate(self._file_handle):
+ line = line.rstrip('\n')
+ self._dict[line_cnt] = line
+
+ def matching_read_dict_file(self, in_set):
+ """If it's necessary to reread the file, it rereads the
+ file. It matches the line it reads against the contents of
+ in_set. If a match is found, the entry on the line is stored
+ for later use, otherwise the line is skipped. When all items
+ in in_set have been matched, the method is done and returns.
+ """
+ if self.should_reread():
+ self._dict.clear()
+ match_cnt = 0
+ max_match = len(in_set)
+ for i, line in enumerate(self._file_handle):
+ if i in in_set:
+ match_cnt += 1
+ line = line.rstrip('\n')
+ self._dict[i] = line
+ if match_cnt >= max_match:
+ break
+
+ def count_entries_removed_during_partial_indexing(self):
+ """Returns the number of entries removed during a second phase
+ of indexing.
+ """
+ return len(self._dict)
+
+class IndexStoreDictMutable(IndexStoreBase):
+ """Dictionary which allows dynamic update of its storage
+ """
+
+ def __init__(self, file_name):
+ IndexStoreBase.__init__(self, file_name)
+ self._dict = {}
+
+ def get_dict(self):
+ return self._dict
+
+ def has_entity(self, entity):
+ return self._dict.has_key(entity)
+
+ def get_id(self, entity):
+ return self._dict[entity]
+
+ def read_dict_file(self):
+ """Reads in a dictionary stored in with an entity
+ and its number on each line.
+ """
+ if self.should_reread():
+ self._dict.clear()
+ for line in self._file_handle:
+ res = line.split()
+ token = res[0]
+ offset = int(res[1])
+ self._dict[token] = offset
+
+ def open_out_file(self, use_dir, version_num):
+ """Opens the output file for this class and prepares it
+ to be written via write_entity.
+ """
+
+ IndexStoreBase._protected_write_dict_file(self, use_dir,
+ version_num, [])
+ self._file_handle = open(os.path.join(use_dir, self._name),
+ 'ab')
+
+ def write_entity(self, entity, my_id):
+ """Writes the entity out to the file with my_id """
+ assert self._file_handle is not None
+ self._file_handle.write(str(entity) + " " + str(my_id) + "\n")
+
+ def write_dict_file(self, path, version_num):
+ """ Generates an iterable list of string representations of
+ the dictionary that the parent's protected_write_dict_file
+ function can call.
+ """
+ IndexStoreBase._protected_write_dict_file(self, path,
+ version_num, [])
+
+ def count_entries_removed_during_partial_indexing(self):
+ """Returns the number of entries removed during a second phase
+ of indexing.
+ """
+ return 0
+
+class IndexStoreSet(IndexStoreBase):
+ """Used when only set membership is desired.
+ This is currently designed for exclusive use
+ with storage of fmri.PkgFmris. However, that impact
+ is only seen in the read_and_discard_matching_from_argument
+ method.
+ """
+ def __init__(self, file_name):
+ IndexStoreBase.__init__(self, file_name)
+ self._set = set()
+
+ def get_set(self):
+ return self._set
+
+ def add_entity(self, entity):
+ self._set.add(entity)
+
+ def remove_entity(self, entity):
+ """Remove entity purposfully assumes that entity is
+ already in the set to be removed. This is useful for
+ error checking and debugging.
+ """
+ self._set.remove(entity)
+
+ def has_entity(self, entity):
+ return (entity in self._set)
+
+ def write_dict_file(self, path, version_num):
+ """Write each member of the set out to a line in a file """
+ IndexStoreBase._protected_write_dict_file(self, path,
+ version_num, self._set)
+
+ def read_dict_file(self):
+ """Process a dictionary file written using the above method
+ """
+ assert self._file_handle
+ res = 0
+ if self.should_reread():
+ self._set.clear()
+ for i, line in enumerate(self._file_handle):
+ line = line.rstrip('\n')
+ assert i == len(self._set)
+ self.add_entity(line)
+ res = i + 1
+ return res
+
+ def read_and_discard_matching_from_argument(self, fmri_set):
+ """Reads the file and removes all frmis in the file
+ from fmri_set.
+ """
+ if self._file_handle:
+ for line in self._file_handle:
+ f = fmri.PkgFmri(line)
+ fmri_set.discard(f)
+
+ def count_entries_removed_during_partial_indexing(self):
+ """Returns the number of entries removed during a second phase
+ of indexing."""
+ return len(self._set)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/server/catalog.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,255 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+import subprocess
+import threading
+import signal
+import os
+import sys
+import cherrypy
+
+import pkg.catalog as catalog
+import pkg.indexer as indexer
+import pkg.server.query_engine as query_e
+
+from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
+from pkg.misc import emsg
+
+class ServerCatalog(catalog.Catalog):
+ """The catalog information which is only needed by the server."""
+
+ def __init__(self, cat_root, authority = None, pkg_root = None,
+ read_only = False, index_root = None, repo_root = None,
+ rebuild = True):
+
+ self.index_root = index_root
+ self.repo_root = repo_root
+
+ # The update_handle lock protects the update_handle variable.
+ # This allows update_handle to be checked and acted on in a
+ # consistent step, preventing the dropping of needed updates.
+ # The check at the top of refresh index should always be done
+ # prior to deciding to spin off a process for indexing as it
+ # prevents more than one indexing process being run at the same
+ # time.
+ self.searchdb_update_handle_lock = threading.Lock()
+
+ if self.index_root:
+ self.query_engine = \
+ query_e.ServerQueryEngine(self.index_root)
+
+ if os.name == 'posix':
+ try:
+ signal.signal(signal.SIGCHLD,
+ self.child_handler)
+ except ValueError:
+ emsg("Tried to create signal handler in "
+ "a thread other than the main thread")
+
+ self.searchdb_update_handle = None
+ self._search_available = False
+ self.deferred_searchdb_updates = []
+ self.deferred_searchdb_updates_lock = threading.Lock()
+
+ self.refresh_again = False
+
+ catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
+ read_only, rebuild)
+
+ def whence(self, cmd):
+ if cmd[0] != '/':
+ tmp_cmd = cmd
+ cmd = None
+ path = os.environ['PATH'].split(':')
+ path.append(os.environ['PWD'])
+ for p in path:
+ if os.path.exists(os.path.join(p, tmp_cmd)):
+ cmd = os.path.join(p, tmp_cmd)
+ break
+ assert cmd
+ return cmd
+
+ def refresh_index(self):
+ """ This function refreshes the search indexes if there any new
+ packages. It starts a subprocess which results in a call to
+ run_update_index (see below) which does the actual update.
+ """
+
+ self.searchdb_update_handle_lock.acquire()
+
+ if self.searchdb_update_handle:
+ self.refresh_again = True
+ self.searchdb_update_handle_lock.release()
+ return
+
+ try:
+ fmris_to_index = set(self.fmris())
+
+ indexer.Indexer.check_for_updates(self.index_root,
+ fmris_to_index)
+
+ if fmris_to_index:
+ if os.name == 'posix':
+ cmd = self.whence(sys.argv[0])
+ args = (cmd, "--refresh-index", "-d",
+ self.repo_root)
+ try:
+ self.searchdb_update_handle = \
+ subprocess.Popen(args,
+ stderr = \
+ subprocess.STDOUT)
+ except Exception, e:
+ emsg("Starting the indexing "
+ "process failed")
+ raise
+ else:
+ self.run_update_index()
+ else:
+ # Since there is nothing to index, setup
+ # the index and declare search available.
+ # We only log this if this represents
+ # a change in status of the server.
+ ind = indexer.Indexer(self.index_root,
+ SERVER_DEFAULT_MEM_USE_KB)
+ ind.setup()
+ if not self._search_available:
+ cherrypy.log("Search Available",
+ "INDEX")
+ self._search_available = True
+ finally:
+ self.searchdb_update_handle_lock.release()
+
+ def run_update_index(self):
+ """ Determines which fmris need to be indexed and passes them
+ to the indexer.
+
+ Note: Only one instance of this method should be running.
+ External locking is expected to ensure this behavior. Calling
+ refresh index is the preferred method to use to reindex.
+ """
+ fmris_to_index = set(self.fmris())
+
+ indexer.Indexer.check_for_updates(self.index_root,
+ fmris_to_index)
+
+ if fmris_to_index:
+ self.__update_searchdb_unlocked(fmris_to_index)
+ else:
+ ind = indexer.Indexer(self.index_root,
+ SERVER_DEFAULT_MEM_USE_KB)
+ ind.setup()
+
+ def build_catalog(self):
+ """ Creates an Indexer instance and after building the
+ catalog, refreshes the index.
+ """
+ ind = indexer.Indexer(self.index_root, SERVER_DEFAULT_MEM_USE_KB)
+ if ind.check_index_existence():
+ self._search_available = True
+ cherrypy.log("Search Available", "INDEX")
+ catalog.Catalog.build_catalog(self)
+ # refresh_index doesn't use file modification times
+ # to determine which packages need to be indexed, so use
+ # it to reindex if it's needed.
+ self.refresh_index()
+
+ def child_handler(self, sig, frame):
+ """ Handler method for the SIGCLD signal. Checks to see if the
+ search database update child has finished, and enables searching
+ if it finished successfully, or logs an error if it didn't.
+ """
+ try:
+ signal.signal(signal.SIGCHLD, self.child_handler)
+ except ValueError:
+ emsg("Tried to create signal handler in "
+ "a thread other than the main thread")
+ # If there's no update_handle, then another subprocess was
+ # spun off and that was what finished. If the poll() returns
+ # None, then while the indexer was running, another process
+ # that was spun off finished.
+ rc = None
+ if not self.searchdb_update_handle:
+ return
+ rc = self.searchdb_update_handle.poll()
+ if rc == None:
+ return
+
+ if rc == 0:
+ self._search_available = True
+ cherrypy.log("Search indexes updated and available.",
+ "INDEX")
+ # Need to acquire this lock to prevent the possibility
+ # of a race condition with refresh_index where a needed
+ # refresh is dropped. It is possible that an extra
+ # refresh will be done with this code, but that refresh
+ # should be very quick to finish.
+ self.searchdb_update_handle_lock.acquire()
+ self.searchdb_update_handle = None
+ self.searchdb_update_handle_lock.release()
+
+ if self.refresh_again:
+ self.refresh_again = False
+ self.refresh_index()
+ elif rc > 0:
+ # XXX This should be logged instead
+ # If the refresh of the index failed, defensively
+ # declare that search is unavailable.
+ self._search_available = False
+ emsg(_("ERROR building search database, rc: %s"))
+ emsg(_(self.searchdb_update_handle.stderr.read()))
+
+ def __update_searchdb_unlocked(self, fmri_list):
+ """ Takes a fmri_list and calls the indexer with a list of fmri
+ and manifest file path pairs. It assumes that all needed
+ locking has already occurred.
+ """
+ assert self.index_root
+ fmri_manifest_list = []
+
+ # Rather than storing those, simply pass along the
+ # file and have the indexer take care of opening and
+ # reading the manifest file. Since the indexer
+ # processes and discards the manifest structure (and its
+ # search dictionary for that matter) this
+ # is much more memory efficient.
+
+ for f in fmri_list:
+ mfst_path = os.path.join(self.pkg_root,
+ f.get_dir_path())
+ fmri_manifest_list.append((f, mfst_path))
+
+ if fmri_manifest_list:
+ index_inst = indexer.Indexer(self.index_root,
+ SERVER_DEFAULT_MEM_USE_KB)
+ index_inst.server_update_index(fmri_manifest_list)
+
+ def search(self, token):
+ """Search through the search database for 'token'. Return a
+ list of token type / fmri pairs."""
+ assert self.index_root
+ if not self.query_engine:
+ self.query_engine = \
+ query_e.ServerQueryEngine(self.index_root)
+ query = query_e.Query(token)
+ return self.query_engine.search(query)
--- a/src/modules/server/config.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/server/config.py Fri Jul 25 13:56:38 2008 -0700
@@ -30,7 +30,7 @@
import statvfs
import shutil
-import pkg.catalog as catalog
+import pkg.server.catalog as catalog
import pkg.updatelog as updatelog
import pkg.server.transaction as trans
@@ -87,23 +87,27 @@
os.makedirs(self.cat_root)
if not os.path.exists(self.update_root):
os.makedirs(self.update_root)
+ if not os.path.exists(self.index_root):
+ os.makedirs(self.index_root)
if os.path.exists(self.trans_root) and \
os.path.exists(self.file_root) and \
os.path.exists(self.pkg_root) and \
os.path.exists(self.cat_root) and \
- os.path.exists(self.update_root):
+ os.path.exists(self.update_root) and \
+ os.path.exists(self.index_root):
return
raise RuntimeError, emsg
def set_repo_root(self, root):
self.repo_root = root
- self.trans_root = "%s/trans" % self.repo_root
- self.file_root = "%s/file" % self.repo_root
- self.pkg_root = "%s/pkg" % self.repo_root
- self.cat_root = "%s/catalog" % self.repo_root
- self.update_root = "%s/updatelog" % self.repo_root
+ self.trans_root = os.path.join(self.repo_root, "trans")
+ self.file_root = os.path.join(self.repo_root, "file")
+ self.pkg_root = os.path.join(self.repo_root, "pkg")
+ self.cat_root = os.path.join(self.repo_root, "catalog")
+ self.update_root = os.path.join(self.repo_root, "updatelog")
+ self.index_root = os.path.join(self.repo_root, "index")
def set_read_only(self):
self.read_only = True
@@ -124,12 +128,14 @@
self.in_flight_trans[t.get_basename()] = t
- def acquire_catalog(self):
+ def acquire_catalog(self, rebuild=True):
"""Tell the catalog to set itself up. Associate an
instance of the catalog with this depot."""
- self.catalog = catalog.Catalog(self.cat_root,
- pkg_root = self.pkg_root, read_only = self.read_only)
+ self.catalog = catalog.ServerCatalog(self.cat_root,
+ pkg_root=self.pkg_root, read_only=self.read_only,
+ index_root=self.index_root, repo_root=self.repo_root,
+ rebuild=rebuild)
# UpdateLog allows server to issue incremental catalog updates
self.updatelog = updatelog.UpdateLog(self.update_root,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/server/query_engine.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,87 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+import threading
+
+import pkg.search_storage as ss
+import pkg.search_errors as search_errors
+import pkg.query_engine as qe
+
+class Query(qe.Query):
+ """ The class which handles all query parsing and representation. """
+ # The empty class is present to allow consumers to import a single
+ # query engine module rather than have to import the client/server
+ # one as well as the base one.
+ pass
+
+
+class ServerQueryEngine(qe.QueryEngine):
+ """ This class contains the data structures and methods needed to
+ perform search on the indexes created by Indexer.
+ """
+ def __init__(self, dir_path):
+
+ # A lock that ensures that only one thread may be modifying
+ # the internal dictionaries at any given time.
+ self.dict_lock = threading.Lock()
+ self.dict_lock.acquire()
+
+ qe.QueryEngine.__init__(self, dir_path)
+
+ try:
+ if self._open_dicts(False):
+ try:
+ for d in self._data_dict.values():
+ if d == self._data_main_dict:
+ continue
+ d.read_dict_file()
+ finally:
+ for d in self._data_dict.values():
+ d.close_file_handle()
+ finally:
+ self.dict_lock.release()
+
+ def _read_dicts(self):
+ for d in self._data_dict.values():
+ if d == self._data_main_dict:
+ continue
+ d.read_dict_file()
+
+ def search(self, query):
+ """ Searches the indexes in dir_path for any matches of query
+ and returns the results.
+ """
+
+ self.dict_lock.acquire()
+ try:
+ self._open_dicts()
+ try:
+ self._read_dicts()
+ _, res_ids = self.search_internal(query)
+ finally:
+ self._close_dicts()
+ return self.get_results(res_ids)
+ finally:
+ self.dict_lock.release()
+
--- a/src/modules/server/repository.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/server/repository.py Fri Jul 25 13:56:38 2008 -0700
@@ -44,7 +44,7 @@
import urllib
-import pkg.catalog as catalog
+import pkg.server.catalog as catalog
import pkg.fmri as fmri
import pkg.misc as misc
import pkg.Uuid25 as uuid
@@ -203,16 +203,16 @@
raise cherrypy.HTTPError(httplib.SERVICE_UNAVAILABLE,
"Search temporarily unavailable")
- try:
- res = self.scfg.catalog.search(token)
- except KeyError:
- raise cherrypy.HTTPError(httplib.NOT_FOUND)
+ res = self.scfg.catalog.search(token)
response.headers['Content-type'] = 'text/plain'
output = ""
+ # The query_engine returns four pieces of information in the
+ # proper order. Put those four pieces of information into a
+ # string that the client can understand.
for l in res:
- output += ("%s %s\n" % (l[0], l[1]))
+ output += ("%s %s %s %s\n" % (l[0], l[1], l[2], l[3]))
return output
@@ -330,17 +330,17 @@
yield ""
- # We have to configure the headers either through the _cp_config
- # namespace, or inside the function itself whenever we are using
- # a streaming generator. This is because headers have to be setup
- # before the response even begins and the point at which @tools
- # hooks in is too late.
+ # We have to configure the headers either through the _cp_config
+ # namespace, or inside the function itself whenever we are using
+ # a streaming generator. This is because headers have to be setup
+ # before the response even begins and the point at which @tools
+ # hooks in is too late.
filelist_0._cp_config = {
- 'response.stream': True,
- 'tools.response_headers.on': True,
- 'tools.response_headers.headers': [('Content-Type',
- 'application/data')]
- }
+ 'response.stream': True,
+ 'tools.response_headers.on': True,
+ 'tools.response_headers.headers': [('Content-Type',
+ 'application/data')]
+ }
@cherrypy.expose
def rename_0(self, *tokens, **params):
--- a/src/modules/server/transaction.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/server/transaction.py Fri Jul 25 13:56:38 2008 -0700
@@ -302,6 +302,8 @@
self.publish_package()
self.cfg.updatelog.add_package(self.fmri, self.critical)
+ self.cfg.catalog.refresh_index()
+
return ("%s" % self.fmri, "PUBLISHED")
def publish_package(self):
@@ -328,10 +330,6 @@
portable.rename("%s/manifest" % self.dir, "%s/%s" %
(pkgdir, urllib.quote(str(fmri.version), "")))
- # update search index
- cfg.catalog.update_searchdb([os.path.join(
- cfg.pkg_root, fmri.get_dir_path()).rsplit('/', 1)])
-
# Move each file to file_root, with appropriate directory
# structure.
for f in os.listdir(self.dir):
--- a/src/modules/version.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/modules/version.py Fri Jul 25 13:56:38 2008 -0700
@@ -61,6 +61,9 @@
def __str__(self):
return ".".join(map(str, self))
+ def __hash__(self):
+ return hash(tuple(self))
+
def is_subsequence(self, other):
"""Return true if self is a "subsequence" of other, meaning that
other and self have identical components, up to the length of
@@ -284,6 +287,9 @@
return 1
return 0
+ def __hash__(self):
+ return hash((self.release, self.branch, self.timestr))
+
def is_successor(self, other, constraint):
"""Evaluate true if self is a successor version to other.
--- a/src/tests/cli-complete.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/tests/cli-complete.py Fri Jul 25 13:56:38 2008 -0700
@@ -50,6 +50,7 @@
import cli.t_depotcontroller
import cli.t_image_create
import cli.t_info_contents
+ import cli.t_search
import cli.t_pkg_install_basics
import cli.t_pkg_install_corrupt_image
import cli.t_pkgsend
@@ -76,7 +77,8 @@
cli.t_circular_dependencies.TestCircularDependencies,
cli.t_recv.TestPkgRecv,
cli.t_rename.TestRename,
- cli.t_twodepot.TestTwoDepots ]
+ cli.t_twodepot.TestTwoDepots,
+ cli.t_search.TestPkgSearch ]
for t in tests:
all_suite.addTest(unittest.makeSuite(t, 'test'))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/tests/cli/t_search.py Fri Jul 25 13:56:38 2008 -0700
@@ -0,0 +1,392 @@
+#!/usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+
+import testutils
+if __name__ == "__main__":
+ testutils.setup_environment("../../../proto")
+
+import os
+import unittest
+import shutil
+import copy
+
+import pkg.depotcontroller as dc
+
+class TestPkgSearch(testutils.SingleDepotTestCase):
+
+ example_pkg10 = """
+ open [email protected],5.11-0
+ add dir mode=0755 owner=root group=bin path=/bin
+ add dir mode=0755 owner=root group=bin path=/bin/example_dir
+ add file /tmp/example_file mode=0555 owner=root group=bin path=/bin/example_path
+ add set name=com.sun.service.incorporated_changes value="6556919 6627937"
+ add set name=com.sun.service.random_test value=42 value=79
+ add set name=com.sun.service.bug_ids value="4641790 4725245 4817791 4851433 4897491 4913776 6178339 6556919 6627937"
+ add set name=com.sun.service.keywords value="sort null -n -m -t sort 0x86 separator"
+ add set name=com.sun.service.info_url value=http://service.opensolaris.com/xml/pkg/[email protected],5.11-1:20080514I120000Z
+ close """
+
+ example_pkg11 = """
+ open [email protected],5.11-0
+ add dir mode=0755 owner=root group=bin path=/bin
+ add file /tmp/example_file mode=0555 owner=root group=bin path=/bin/example_path11
+ close """
+
+ headers = "INDEX ACTION VALUE PACKAGE\n"
+
+ res_remote_path = set([
+ headers,
+ "basename file bin/example_path pkg:/[email protected]\n"
+ ])
+
+ res_remote_bin = set([
+ headers,
+ "path dir bin pkg:/[email protected]\n"
+ ])
+
+ res_remote_bug_id = set([
+ headers,
+ "com.sun.service.bug_ids set 4851433 pkg:/[email protected]\n"
+
+ ])
+
+ res_remote_inc_changes = set([
+ headers,
+ "com.sun.service.incorporated_changes set 6556919 pkg:/[email protected]\n",
+ "com.sun.service.bug_ids set 6556919 pkg:/[email protected]\n"
+
+ ])
+
+ res_remote_random_test = set([
+ headers,
+ "com.sun.service.random_test set 42 pkg:/[email protected]\n"
+ ])
+
+ res_remote_keywords = set([
+ headers,
+ "com.sun.service.keywords set separator pkg:/[email protected]\n"
+ ])
+
+ res_remote_wildcard = set([
+ headers,
+ "basename file bin/example_path pkg:/[email protected]\n",
+ "basename dir bin/example_dir pkg:/[email protected]\n"
+ ])
+
+ res_remote_glob = set([
+ headers,
+ "basename file bin/example_path pkg:/[email protected]\n",
+ "basename dir bin/example_dir pkg:/[email protected]\n",
+ "path file bin/example_path pkg:/[email protected]\n",
+ "path dir bin/example_dir pkg:/[email protected]\n"
+ ])
+
+ local_fmri_string = \
+ "fmri set fmri pkg:/[email protected]\n"
+
+
+ res_local_pkg = set([
+ headers,
+ local_fmri_string
+ ])
+
+ res_local_path = copy.copy(res_remote_path)
+
+ res_local_bin = copy.copy(res_remote_bin)
+
+ res_local_bug_id = copy.copy(res_remote_bug_id)
+
+ res_local_inc_changes = copy.copy(res_remote_inc_changes)
+
+ res_local_random_test = copy.copy(res_remote_random_test)
+
+ res_local_keywords = copy.copy(res_remote_keywords)
+
+ res_local_wildcard = copy.copy(res_remote_wildcard)
+ res_local_wildcard.add(local_fmri_string)
+
+ res_local_glob = copy.copy(res_remote_glob)
+ res_local_glob.add(local_fmri_string)
+
+ res_local_path_example11 = set([
+ headers,
+ "basename file bin/example_path11 pkg:/[email protected]\n"
+ ])
+
+ res_local_bin_example11 = set([
+ headers,
+ "path dir bin pkg:/[email protected]\n"
+ ])
+
+ res_local_wildcard_example11 = set([
+ headers,
+ "basename file bin/example_path11 pkg:/[email protected]\n",
+ "fmri set fmri pkg:/[email protected]\n"
+ ])
+
+ res_local_pkg_example11 = set([
+ headers,
+ "fmri set fmri pkg:/[email protected]\n"
+ ])
+
+
+ misc_files = ['/tmp/example_file']
+
+ # This is a copy of the 3.81%2C5.11-0.89%3A20080527T163123Z version of
+ # SUNWgmake from ipkg with the file and liscense actions changed so
+ # that they all take /tmp/example file when sending.
+ bug_983_manifest = """
+open [email protected],5.11-0.89
+add dir group=sys mode=0755 owner=root path=usr
+add dir group=bin mode=0755 owner=root path=usr/bin
+add dir group=bin mode=0755 owner=root path=usr/gnu
+add dir group=bin mode=0755 owner=root path=usr/gnu/bin
+add link path=usr/gnu/bin/make target=../../bin/gmake
+add dir group=sys mode=0755 owner=root path=usr/gnu/share
+add dir group=bin mode=0755 owner=root path=usr/gnu/share/man
+add dir group=bin mode=0755 owner=root path=usr/gnu/share/man/man1
+add link path=usr/gnu/share/man/man1/make.1 target=../../../../share/man/man1/gmake.1
+add dir group=bin mode=0755 owner=root path=usr/sfw
+add dir group=bin mode=0755 owner=root path=usr/sfw/bin
+add link path=usr/sfw/bin/gmake target=../../bin/gmake
+add dir group=bin mode=0755 owner=root path=usr/sfw/share
+add dir group=bin mode=0755 owner=root path=usr/sfw/share/man
+add dir group=bin mode=0755 owner=root path=usr/sfw/share/man/man1
+add link path=usr/sfw/share/man/man1/gmake.1 target=../../../../share/man/man1/gmake.1
+add dir group=sys mode=0755 owner=root path=usr/share
+add dir group=bin mode=0755 owner=root path=usr/share/info
+add dir group=bin mode=0755 owner=root path=usr/share/man
+add dir group=bin mode=0755 owner=root path=usr/share/man/man1
+add file /tmp/example_file elfarch=i386 elfbits=32 elfhash=68cca393e816e6adcbac1e8ffe9c618de70413e0 group=bin mode=0555 owner=root path=usr/bin/gmake pkg.size=153036
+add file /tmp/example_file group=bin mode=0444 owner=root path=usr/share/info/make.info pkg.size=5442
+add file /tmp/example_file group=bin mode=0444 owner=root path=usr/share/info/make.info-1 pkg.size=301265
+add file /tmp/example_file group=bin mode=0444 owner=root path=usr/share/info/make.info-2 pkg.size=221686
+add file /tmp/example_file group=bin mode=0444 owner=root path=usr/share/man/man1/gmake.1 pkg.size=10740
+add license /tmp/example_file license=SUNWgmake.copyright pkg.size=18043 transaction_id=1211931083_pkg%3A%2FSUNWgmake%403.81%2C5.11-0.89%3A20080527T163123Z
+add depend fmri=pkg:/[email protected] type=require
+add set name=description value="gmake - GNU make"
+add legacy arch=i386 category=system desc="GNU make - A utility used to build software (gmake) 3.81" hotline="Please contact your local service provider" name="gmake - GNU make" pkg=SUNWgmake vendor="Sun Microsystems, Inc." version=11.11.0,REV=2008.04.29.02.08
+close
+"""
+
+ res_bug_983 = set([
+ headers,
+ "basename link usr/sfw/bin/gmake pkg:/[email protected]\n",
+ "basename file usr/bin/gmake pkg:/[email protected]\n",
+ "description set gmake pkg:/[email protected]\n"
+
+ ])
+
+
+ def setUp(self):
+ for p in self.misc_files:
+ f = open(p, "w")
+ # Write the name of the file into the file, so that
+ # all files have differing contents.
+ f.write(p + "\n")
+ f.close()
+ testutils.SingleDepotTestCase.setUp(self)
+ tp = self.get_test_prefix()
+ self.testdata_dir = os.path.join(tp, "search_results")
+ os.mkdir(self.testdata_dir)
+
+ def tearDown(self):
+ testutils.SingleDepotTestCase.tearDown(self)
+ for p in self.misc_files:
+ os.remove(p)
+ shutil.rmtree(self.testdata_dir)
+
+ def _check(self, proposed_answer, correct_answer):
+ if correct_answer == proposed_answer:
+ return True
+ else:
+ print "Proposed Answer: " + str(proposed_answer)
+ print "Correct Answer : " + str(correct_answer)
+ if isinstance(correct_answer, set) and \
+ isinstance(proposed_answer, set):
+ print "Missing: " + str(correct_answer -
+ proposed_answer)
+ print "Extra : " + str(proposed_answer -
+ correct_answer)
+ assert correct_answer == proposed_answer
+
+ def _search_op(self, remote, token, test_value):
+ outfile = os.path.join(self.testdata_dir, "res")
+ if remote:
+ token = "-r " + token
+ self.pkg("search " + token + " > " + outfile)
+ res_list = (open(outfile, "rb")).readlines()
+ self._check(set(res_list), test_value)
+
+ def _run_remote_tests(self):
+ # Set to 1 since searches can't currently be performed
+ # package name unless it's set inside the
+ # manifest which happens at install time on
+ # the client side.
+ self.pkg("search -r example_pkg", exit=1)
+
+ self._search_op(True, "example_path", self.res_remote_path)
+ self._search_op(True, "example*", self.res_remote_wildcard)
+ self._search_op(True, "/bin", self.res_remote_bin)
+ self._search_op(True, "4851433", self.res_remote_bug_id)
+ self._search_op(True, "6556919", self.res_remote_inc_changes)
+ self._search_op(True, "42", self.res_remote_random_test)
+ self._search_op(True, "separator", self.res_remote_keywords)
+ self._search_op(True, "*example*", self.res_remote_glob)
+
+ # These tests are included because a specific bug
+ # was found during development. This prevents regression back
+ # to that bug. Exit status of 1 is expected because the
+ # token isn't in the packages.
+ self.pkg("search -r a_non_existent_token", exit=1)
+ self.pkg("search -r a_non_existent_token", exit=1)
+
+ def test_remote(self):
+ """Test remote search."""
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.example_pkg10)
+
+ self.image_create(durl)
+ self._run_remote_tests()
+
+ def _run_local_tests(self):
+ outfile = os.path.join(self.testdata_dir, "res")
+
+ # This finds something because the client side
+ # manifest has had the name of the package inserted
+ # into it.
+
+ self._search_op(False, "example_pkg", self.res_local_pkg)
+ self._search_op(False, "example_path", self.res_local_path)
+ self._search_op(False, "example*", self.res_local_wildcard)
+ self._search_op(False, "/bin", self.res_local_bin)
+ self._search_op(False, "4851433", self.res_local_bug_id)
+ self._search_op(False, "6556919", self.res_local_inc_changes)
+ self._search_op(False, "42", self.res_local_random_test)
+ self._search_op(False, "separator", self.res_local_keywords)
+ self._search_op(False, "*example*", self.res_local_glob)
+
+ # These tests are included because a specific bug
+ # was found during development. These tests prevent regression
+ # back to that bug. Exit status of 1 is expected because the
+ # token isn't in the packages.
+ self.pkg("search a_non_existent_token", exit=1)
+ self.pkg("search a_non_existent_token", exit=1)
+
+ def _run_local_tests_example11_installed(self):
+ outfile = os.path.join(self.testdata_dir, "res")
+
+ # This finds something because the client side
+ # manifest has had the name of the package inserted
+ # into it.
+
+ self._search_op(False, "example_pkg", self.res_local_pkg_example11)
+ self.pkg("search example_path", exit=1)
+ self._search_op(False, "example_path11", self.res_local_path_example11)
+ self._search_op(False, "example*", self.res_local_wildcard_example11)
+ self._search_op(False, "/bin", self.res_local_bin_example11)
+
+ def _run_local_empty_tests(self):
+ self.pkg("search example_pkg", exit=1)
+ self.pkg("search example_path", exit=1)
+ self.pkg("search example*", exit=1)
+ self.pkg("search /bin", exit=1)
+
+ def test_local(self):
+ """Install one package, and run the search suite."""
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.example_pkg10)
+
+ self.image_create(durl)
+
+ self.pkg("install example_pkg")
+
+ self._run_local_tests()
+
+ def test_repeated_install_uninstall(self):
+ """Install and uninstall a package. Checking search both
+ after each change to the image."""
+ # During development, the index could become corrupted by
+ # repeated installing and uninstalling a package. This
+ # tests if that has been fixed.
+ repeat = 3
+
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.example_pkg10)
+ self.image_create(durl)
+
+ self.pkg("install example_pkg")
+ self.pkg("uninstall example_pkg")
+
+ for i in range(1, repeat):
+ self.pkg("install example_pkg")
+ self._run_local_tests()
+ self.pkg("uninstall example_pkg")
+ self._run_local_empty_tests()
+
+ def test_local_image_update(self):
+ """Test that the index gets updated by image-update and
+ that rebuilding the index works after updating the
+ image. Specifically, this tests that rebuilding indexes with
+ gaps in them works correctly."""
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.example_pkg10)
+ self.image_create(durl)
+
+ self.pkg("install example_pkg")
+
+ self.pkgsend_bulk(durl, self.example_pkg11)
+
+ self.pkg("image-update")
+
+ self._run_local_tests_example11_installed()
+
+ self.pkg("rebuild-index")
+
+ self._run_local_tests_example11_installed()
+
+ def test_bug_983(self):
+ """Test for known bug 983."""
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.bug_983_manifest)
+ self.image_create(durl)
+
+ self._search_op(True, "gmake", self.res_bug_983)
+
+ def test_low_mem(self):
+ """Test to check codepath used in low memory situations."""
+ os.environ["PKG_INDEX_MAX_RAM"] = "0"
+ durl = self.dc.get_depot_url()
+ self.pkgsend_bulk(durl, self.example_pkg10)
+ self.pkgsend_bulk(durl, self.bug_983_manifest)
+ self.pkgsend_bulk(durl, self.bug_983_manifest)
+
+ self.image_create(durl)
+
+ self._run_remote_tests()
+ self._search_op(True, "gmake", self.res_bug_983)
+
+if __name__ == "__main__":
+ unittest.main()
--- a/src/tests/perf/fmribench.py Tue Jul 22 19:36:15 2008 -0500
+++ b/src/tests/perf/fmribench.py Fri Jul 25 13:56:38 2008 -0700
@@ -32,6 +32,19 @@
import sys
benches = [
+ [ "version hash (tstamp)", 1000000,
+ """import pkg.version as version
+f1 = version.Version("5.11-0.72:20070921T203926Z", "0.5.11")""",
+ """hash(f1)"""
+ ],
+
+ [ "version hash (no-tstamp)", 1000000,
+ """import pkg.version as version
+f1 = version.Version("5.11-0.72", "0.5.11")""",
+ """hash(f1)"""
+ ],
+
+
[ "dotsequence creation", 100000,
"""import pkg.version as version""",
"""v1 = version.DotSequence("0.72.1")"""
@@ -204,6 +217,7 @@
"""f1.is_successor(f2)"""
],
+
]
if __name__ == "__main__":