Reduce use of the re module, for performance.
authorDanek Duvall <danek.duvall@sun.com>
Fri, 12 Oct 2007 18:05:07 -0700
changeset 146 9b9fcd56b8b2
parent 145 08bee7fd13f6
child 147 548df974fb92
Reduce use of the re module, for performance.
src/depot.py
src/modules/client/image.py
src/modules/client/imageplan.py
src/modules/client/retrieve.py
src/modules/fmri.py
src/modules/manifest.py
src/modules/package.py
src/modules/server/face.py
src/modules/version.py
--- a/src/depot.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/depot.py	Fri Oct 12 18:05:07 2007 -0700
@@ -101,8 +101,7 @@
         scfg.inc_manifest()
 
         # Parse request into FMRI component and decode.
-        m = re.match("^/manifest/\d+/(.*)", request.path)
-        pfmri = urllib.unquote(m.group(1))
+        pfmri = urllib.unquote(request.path.split("/", 3)[-1])
 
         f = fmri.PkgFmri(pfmri, None)
 
@@ -151,8 +150,7 @@
         """The request is the SHA-1 hash name for the file."""
         scfg.inc_file()
 
-        m = re.match("^/file/\d+/(.*)", request.path)
-        fhash = m.group(1)
+        fhash = request.path.split("/", 3)[-1]
 
         try:
                 file = open(os.path.normpath(os.path.join(
@@ -200,8 +198,7 @@
                 return
 
         # Pull transaction ID from headers.
-        m = re.match("^/close/\d+/(.*)", request.path)
-        trans_id = m.group(1)
+        trans_id = request.path.split("/", 3)[-1]
 
         # XXX KeyError?
         t = scfg.in_flight_trans[trans_id]
@@ -214,8 +211,7 @@
                 return
 
         # Pull transaction ID from headers.
-        m = re.match("^/abandon/\d+/(.*)", request.path)
-        trans_id = m.group(1)
+        trans_id = request.path.split("/", 3)[-1]
 
         t = scfg.in_flight_trans[trans_id]
         t.abandon(request)
@@ -226,9 +222,7 @@
                 request.send_error(403, "Read-only server")
                 return
 
-        m = re.match("^/add/\d+/([^/]*)/(.*)", request.path)
-        trans_id = m.group(1)
-        type = m.group(2)
+        trans_id, type = request.path.split("/", 4)[-2:]
 
         t = scfg.in_flight_trans[trans_id]
         t.add_content(request, type)
--- a/src/modules/client/image.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/client/image.py	Fri Oct 12 18:05:07 2007 -0700
@@ -27,7 +27,6 @@
 import cPickle
 import getopt
 import os
-import re
 import urllib
 import urlparse
 # import uuid           # XXX interesting 2.5 module
@@ -217,7 +216,7 @@
 
                 o = self.cfg_cache.authorities[authority]["origin"]
 
-                return re.sub("/+$", "", o)
+                return o.rstrip("/")
 
         def get_default_authority(self):
                 return self.cfg_cache.preferred_authority
--- a/src/modules/client/imageplan.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/client/imageplan.py	Fri Oct 12 18:05:07 2007 -0700
@@ -24,7 +24,6 @@
 # Use is subject to license terms.
 
 import os
-import re
 import urllib
 
 import pkg.catalog as catalog
--- a/src/modules/client/retrieve.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/client/retrieve.py	Fri Oct 12 18:05:07 2007 -0700
@@ -25,7 +25,6 @@
 
 import getopt
 import os
-import re
 import sys
 import urllib
 import urlparse
--- a/src/modules/fmri.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/fmri.py	Fri Oct 12 18:05:07 2007 -0700
@@ -43,58 +43,27 @@
 
         def __init__(self, fmri, build_release):
                 """XXX pkg:/?pkg_name@version not presently supported."""
-                m = re.match("pkg://([^/]*)/([^@]*)@([\dTZ\,\.\-\:]*)", fmri)
-                if m != None:
-                        self.authority = m.group(1)
-                        self.pkg_name = m.group(2)
-                        self.version = Version(m.group(3), build_release)
-
-                        return
+                fmri = fmri.rstrip()
 
-                m = re.match("pkg://([^/]*)/([^@]*)", fmri)
-                if m != None:
-                        self.authority = m.group(1)
-                        self.pkg_name = m.group(2)
-                        self.version = None
-
-                        return
-
-                m = re.match("pkg:/([^/][^@]*)@([\dTZ\,\.\-\:]*)", fmri)
-                if m != None:
-                        # XXX Replace with server's default authority.
-                        self.authority = None
-                        self.pkg_name = m.group(1)
-                        self.version = Version(m.group(2), build_release)
-
-                        return
+                try:
+                        veridx = fmri.rindex("@")
+                        self.version = Version(fmri[veridx + 1:], build_release)
+                except ValueError:
+                        self.version = veridx = None
 
-                m = re.match("pkg:/([^/][^@]*)", fmri)
-                if m != None:
-                        # XXX Replace with server's default authority.
-                        self.authority = None
-                        self.pkg_name = m.group(1)
-                        self.version = None
-
-                        return
+                self.authority = None
+                if fmri.startswith("pkg://"):
+                        nameidx = fmri.index("/", 6) + 1
+                        self.authority = fmri[6:nameidx - 1]
+                elif fmri.startswith("pkg:/"):
+                        nameidx = 5
+                else:
+                        nameidx = 0
 
-                m = re.match("([^@]*)@([\dTZ\,\.\-\:]*)", fmri)
-                if m != None:
-                        # XXX Replace with server's default authority.
-                        self.authority = None
-                        self.pkg_name = m.group(1)
-                        self.version = Version(m.group(2), build_release)
-
-                        return
-
-                m = re.match("([^@]*)", fmri)
-                if m != None:
-                        # XXX Replace with server's default authority.
-                        self.authority = None
-                        self.pkg_name = m.group(1)
-                        self.version = None
-
-                        return
-
+                if veridx:
+                        self.pkg_name = fmri[nameidx:veridx]
+                else:
+                        self.pkg_name = fmri[nameidx:]
 
         def get_authority(self):
                 return self.authority
--- a/src/modules/manifest.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/manifest.py	Fri Oct 12 18:05:07 2007 -0700
@@ -24,7 +24,6 @@
 
 import bisect
 import os
-import re
 import sha
 import shutil
 import time
@@ -229,7 +228,8 @@
                 # action more than once in packages that can be installed
                 # together has to be solved somewhere else, though.)
                 for l in str.splitlines():
-                        if re.match("^\s*(#.*)?$", l):
+                        l = l.lstrip()
+                        if not l or l[0] == "#":
                                 continue
 
                         try:
--- a/src/modules/package.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/package.py	Fri Oct 12 18:05:07 2007 -0700
@@ -27,7 +27,6 @@
 
 import errno
 import os
-import re
 import sha
 import shutil
 import time
--- a/src/modules/server/face.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/server/face.py	Fri Oct 12 18:05:07 2007 -0700
@@ -26,8 +26,6 @@
 
 # XXX Use small templating module?
 
-import re
-
 # Non-HTML GET functions
 
 def css(img, request):
--- a/src/modules/version.py	Fri Oct 12 17:52:11 2007 -0700
+++ b/src/modules/version.py	Fri Oct 12 18:05:07 2007 -0700
@@ -27,8 +27,6 @@
 
 import datetime
 import exceptions
-import re
-import string
 import time
 
 CONSTRAINT_NONE = 0
@@ -53,13 +51,13 @@
         value as the first two numbers in the sequence."""
 
         def __init__(self, dotstring):
-                m = re.match("\d+(\.\d)*", dotstring)
-                if m == None:
-                        raise IllegalDotSequence
-                self.sequence = map(int, re.split("\.", dotstring))
+                try:
+                        self.sequence = map(int, dotstring.split("."))
+                except ValueError:
+                        raise IllegalDotSequence(dotstring)
 
         def __str__(self):
-                return string.join(map(str, self.sequence), ".")
+                return ".".join(map(str, self.sequence))
 
         def __ne__(self, other):
                 if self.sequence != other.sequence:
@@ -126,87 +124,52 @@
 
         def __init__(self, version_string, build_string):
                 # XXX If illegally formatted, raise exception.
-                m = re.match(
-                    "(\d+[\.\d]*),(\d+[\.\d]*)-(\d+[\.\d]*)\:(\d+T\d+Z)",
-                    version_string)
 
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.build_release = DotSequence(m.group(2))
-                        self.branch = DotSequence(m.group(3))
-                        self.datetime = datetime.datetime(
-                             *time.strptime(m.group(4), "%Y%m%dT%H%M%SZ")[0:6])
-
-                        return
-
-                m = re.match(
-                    "(\d+[\.\d]*),(\d+[\.\d]*)-(\d+[\.\d]*)\:(\d+)",
-                    version_string)
+                try:
+                        timeidx = version_string.index(":")
+                        timestr = version_string[timeidx + 1:]
+                except ValueError:
+                        timeidx = None
+                        timestr = None
 
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.build_release = DotSequence(m.group(2))
-                        self.branch = DotSequence(m.group(3))
-                        self.datetime = datetime.datetime.fromtimestamp(
-                                float(m.group(4)))
-
-                        return
+                try:
+                        branchidx = version_string.index("-")
+                        branch = version_string[branchidx + 1:timeidx]
+                except ValueError:
+                        branchidx = timeidx
+                        branch = None
 
-                m = re.match("(\d+[\.\d]*),(\d+[\.\d]*)-(\d+[\.\d]*)",
-                    version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.build_release = DotSequence(m.group(2))
-                        self.branch = DotSequence(m.group(3))
-                        self.datetime = datetime.datetime.fromtimestamp(0)
-                        return
+                try:
+                        buildidx = version_string.index(",")
+                        build = version_string[buildidx + 1:branchidx]
+                except ValueError:
+                        buildidx = branchidx
+                        build = None
 
-                m = re.match("(\d+[\.\d]*),(\d+[\.\d]*)",
-                    version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.build_release = DotSequence(m.group(2))
+                self.release = DotSequence(version_string[:buildidx])
+
+                if branch:
+                        self.branch = DotSequence(branch)
+                else:
                         self.branch = DotSequence("0")
-                        self.datetime = datetime.datetime.fromtimestamp(0)
-                        return
 
-                assert build_string != None
-                self.build_release = DotSequence(build_string)
-
-                m = re.match("(\d+[\.\d]*)-(\d+[\.\d]*)\:(\d+T\d+Z)",
-                    version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.branch = DotSequence(m.group(2))
-                        self.datetime = datetime.datetime(
-                            *time.strptime(m.group(3), "%Y%m%dT%H%M%SZ")[0:6])
-                        return
+                if build:
+                        self.build_release = DotSequence(build)
+                else:
+                        assert build_string is not None
+                        self.build_release = DotSequence(build_string)
 
-                m = re.match("(\d+[\.\d]*)-(\d+[\.\d]*)\:(\d+)", version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.branch = DotSequence(m.group(2))
-                        self.datetime = datetime.datetime.fromtimestamp(
-                            float(m.group(3)))
-                        return
+                if timestr:
+                        if timestr.endswith("Z") and "T" in timestr:
+                                self.datetime = datetime.datetime(
+                                    *time.strptime(timestr, "%Y%m%dT%H%M%SZ")[0:6])
+                        else:
+                                self.datetime = datetime.datetime.fromtimestamp(
+                                    float(timestr))
+                else:
+                        self.datetime = datetime.datetime.fromtimestamp(0)
 
-                # Sequence omitted?
-                m = re.match("(\d[\.\d]*)-(\d[\.\d]*)", version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.branch = DotSequence(m.group(2))
-                        self.datetime = datetime.datetime.fromtimestamp(0)
-                        return
-
-                # Branch omitted?
-                m = re.match("(\d[\.\d]*)", version_string)
-                if m != None:
-                        self.release = DotSequence(m.group(1))
-                        self.branch = DotSequence("0")
-                        self.datetime = datetime.datetime.fromtimestamp(0)
-                        return
-
-                raise IllegalVersion
+                # raise IllegalVersion
 
         def compatible_with_build(self, target):
                 """target is a DotSequence for the target system."""
@@ -352,6 +315,18 @@
         d3 = DotSequence("5.4")
         d4 = DotSequence("5.6")
 
+        assert str(v1) == "5.5.1,5.5.1-10:20051122T000000Z"
+        assert str(v2) == "5.5.1,5.5.1-10:20070318T123456Z"
+        assert str(v3) == "5.5.1,5.5-10:19691231T160000Z"
+        assert str(v4) == "5.5.1,5.4-6:19691231T160000Z"
+        assert str(v5) == "5.6,1-0:19691231T160000Z"
+        assert str(v6) == "5.7,5.4-0:19691231T160000Z"
+        assert str(v7) == "5.10,5.5.1-0:19691231T160000Z"
+        assert str(v8) == "5.10.1,5.5.1-0:19691231T160000Z"
+        assert str(v9) == "5.11,5.5.1-0:19691231T160000Z"
+        assert str(v10) == "0.1,5.11-1:19691231T160000Z"
+        assert str(v11) == "0.1,5.11-1:20070710T120000Z"
+
         assert v1 < v2
         assert v4 < v3
         assert v4 < v5