usr/src/tools/onbld/Scm/Backup.py
author Mark J. Nelson <Mark.J.Nelson@Sun.COM>
Wed, 06 Aug 2008 16:29:39 -0600
changeset 7298 b69e27387f74
parent 7078 935563142864
child 8602 c56c22350266
permissions -rw-r--r--
6733918 Teamware has retired, please welcome your new manager, Mercurial 4758439 some files use "current date" sccs keywords 6560843 asm sources should not rely on .file "%M%" for naming STT_FILE symbols 6560958 Solaris:: perl modules should not use SCCS keywords in version information 6729074 webrev doesn't deal well with remote ssh hg parents

#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 2
#  as published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#

#
# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

'''
Workspace backup

Backup format is:
   backupdir/
      wsname/
         generation#/
            dirstate (handled by CdmUncommittedBackup)
                File containing dirstate nodeid (the tip we expect to be at
                after applying the bundle).

            bundle (handled by CdmCommittedBackup)
                An Hg bundle containing outgoing committed changes.

            nodes (handled by CdmCommittedBackup)
                A text file listing the full (hex) nodeid of all nodes in
                bundle, used by need_backup.

            diff (handled by CdmUncommittedBackup)
                A Git-formatted diff containing uncommitted changes.

            renames (handled by CdmUncommittedBackup)
                A list of renames in the working copy that have to be
                applied manually, rather than by the diff.

            metadata.tar.gz (handled by CdmMetadataBackup)
                $CODEMGR_WS/.hg/hgrc
                $CODEMGR_WS/.hg/localtags
                $CODEMGR_WS/.hg/patches (Mq data)

         latest -> generation#
            Newest backup generation.

All files in a given backup generation, with the exception of
dirstate, are optional.
'''

import os, pwd, shutil, traceback, tarfile, time
from mercurial import changegroup, patch, node, util
from cStringIO import StringIO


class CdmCommittedBackup(object):
    '''Backup of committed changes'''

    def __init__(self, backup, ws):
        self.ws = ws
        self.bu = backup
        self.files = ('bundle', 'nodes')

    def _outgoing_nodes(self, parent):
        '''Return a list of all outgoing nodes in hex format'''

        if parent:
            outgoing = self.ws.findoutgoing(parent)
            nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
            return map(node.hex, nodes)
        else:
            return []

    def backup(self):
        '''Backup committed changes'''
        parent = self.ws.parent()

        if not parent:
            self.ws.ui.warn('Workspace has no parent, committed changes will '
                            'not be backed up\n')
            return

        out = self.ws.findoutgoing(parent)
        if not out:
            return

        cg = self.ws.repo.changegroup(out, 'bundle')
        changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')

        outnodes = self._outgoing_nodes(parent)
        if outnodes:
            fp = None
            try:
                try:
                    fp = open(self.bu.backupfile('nodes'), 'w')
                    fp.write('%s\n' % '\n'.join(outnodes))
                except EnvironmentError, e:
                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
            finally:
                if fp and not fp.closed:
                    fp.close()

    def restore(self):
        '''Restore committed changes from backup'''
        bfile = self.bu.backupfile('bundle')

        if os.path.exists(bfile):
            f = None
            try:
                try:
                    f = open(bfile, 'r')
                    bundle = changegroup.readbundle(f, bfile)
                    self.ws.repo.addchangegroup(bundle, 'strip',
                                                'bundle:%s' % bfile)
                except EnvironmentError, e:
                    raise util.Abort("couldn't restore committed changes: %s\n"
                                     "   %s" % (bfile, e))
            finally:
                if f and not f.closed:
                    f.close()

    def need_backup(self):
        '''Compare backup of committed changes to workspace'''

        if os.path.exists(self.bu.backupfile('nodes')):
            f = None
            try:
                try:
                    f = open(self.bu.backupfile('nodes'))
                    bnodes = set([line.rstrip('\r\n')
                                  for line in f.readlines()])
                    f.close()
                except EnvironmentError, e:
                    raise util.Abort("couldn't open backup node list: %s" % e)
            finally:
                if f and not f.closed:
                    f.close()
        else:
            bnodes = set()

        outnodes = set(self._outgoing_nodes(self.ws.parent()))
        if outnodes != bnodes:
            return True

        return False

    def cleanup(self):
        '''Remove backed up committed changes'''

        for fname in self.files:
            if os.path.exists(self.bu.backupfile(fname)):
                os.unlink(self.bu.backupfile(fname))


class CdmUncommittedBackup(object):
    '''Backup of uncommitted changes'''

    def __init__(self, backup, ws):
        self.ws = ws
        self.bu = backup

    def _clobbering_renames(self):
        '''Return a list of pairs of files representing renames/copies
        that clobber already versioned files.  [(oldname newname)...]'''

        #
        # Note that this doesn't handle uncommitted merges
        # as CdmUncommittedBackup itself doesn't.
        #
        wctx = self.ws.repo.workingctx()
        parent = wctx.parents()[0]

        ret = []
        for fname in wctx.added() + wctx.modified():
            rn = wctx.filectx(fname).renamed()
            if rn and fname in parent:
                ret.append((rn[0], fname))
        return ret

    def backup(self):
        '''Backup uncommitted changes'''

        if self.ws.merged():
            raise util.Abort("Unable to backup an uncommitted merge.\n"
                             "Please complete your merge and commit")

        dirstate = node.hex(self.ws.repo.changectx().node())

        fp = None
        try:
            try:
                fp = open(self.bu.backupfile('dirstate'), 'w')
                fp.write(dirstate + '\n')
            except EnvironmentError, e:
                raise util.Abort("couldn't save working copy parent: %s" % e)
        finally:
            if fp and not fp.closed:
                fp.close()

        try:
            try:
                fp = open(self.bu.backupfile('renames'), 'w')
                for cons in self._clobbering_renames():
                    fp.write("%s %s\n" % cons)
            except EnvironmentError, e:
                raise util.Abort("couldn't save clobbering copies: %s" % e)
        finally:
            if fp and not fp.closed:
                fp.close()

        try:
            try:
                fp = open(self.bu.backupfile('diff'), 'w')
                patch.diff(self.ws.repo, fp=fp,
                           opts=patch.diffopts(self.ws.ui, opts={'git': True}))
            except EnvironmentError, e:
                raise util.Abort("couldn't save working copy diff: %s" % e)
        finally:
            if fp and not fp.closed:
                fp.close()

    def _dirstate(self):
        '''Return the current working copy node'''
        fp = None
        try:
            try:
                fp = open(self.bu.backupfile('dirstate'))
                dirstate = fp.readline().strip()
                return dirstate
            except EnvironmentError, e:
                raise util.Abort("couldn't read saved parent: %s" % e)
        finally:
            if fp and not fp.closed:
                fp.close()

    def restore(self):
        '''Restore uncommitted changes'''
        diff = self.bu.backupfile('diff')
        dirstate = self._dirstate()

        try:
            self.ws.clean(rev=dirstate)
        except util.Abort, e:
            raise util.Abort("couldn't update to saved node: %s" % e)

        if not os.path.exists(diff):
            return

        #
        # There's a race here whereby if the patch (or part thereof)
        # is applied within the same second as the clean above (such
        # that mtime doesn't change) and if the size of that file
        # does not change, Hg may not see the change.
        #
        # We sleep a full second to avoid this, as sleeping merely
        # until the next second begins would require very close clock
        # synchronization on network filesystems.
        #
        time.sleep(1)

        files = {}
        try:
            try:
                fuzz = patch.patch(diff, self.ws.ui, strip=1,
                                   cwd=self.ws.repo.root, files=files)
                if fuzz:
                    raise util.Abort('working copy diff applied with fuzz')
            except Exception, e:
                raise util.Abort("couldn't apply working copy diff: %s\n"
                                 "   %s" % (diff, e))
        finally:
            patch.updatedir(self.ws.ui, self.ws.repo, files)

        if not os.path.exists(self.bu.backupfile('renames')):
            return

        #
        # We need to re-apply name changes where the new name
        # (rename/copy destination) is an already versioned file, as
        # Hg would otherwise ignore them.
        #
        try:
            fp = open(self.bu.backupfile('renames'))
            for line in fp:
                source, dest = line.strip().split()
                self.ws.repo.copy(source, dest)
        except EnvironmentError, e:
            raise util.Abort('unable to open renames file: %s' % e)
        except ValueError:
            raise util.Abort('corrupt renames file: %s' %
                             self.bu.backupfile('renames'))

    def need_backup(self):
        '''Compare backup of uncommitted changes to workspace'''
        if self._dirstate() != node.hex(self.ws.repo.changectx().node()):
            return True

        curdiff = StringIO()
        diff = self.bu.backupfile('diff')
        fd = None

        patch.diff(self.ws.repo, fp=curdiff,
                   opts=patch.diffopts(self.ws.ui, opts={'git': True}))

        if os.path.exists(diff):
            try:
                try:
                    fd = open(diff)
                    backdiff = fd.read()
                except EnvironmentError, e:
                    raise util.Abort("couldn't open backup diff %s\n"
                                     "   %s" % (diff, e))
            finally:
                if fd and not fd.closed:
                    fd.close()
        else:
            backdiff = ''

        if backdiff != curdiff.getvalue():
            return True


        currrenamed = self._clobbering_renames()
        bakrenamed = None

        if os.path.exists(self.bu.backupfile('renames')):
            try:
                try:
                    fd = open(self.bu.backupfile('renames'))
                    bakrenamed = [line.strip().split(' ') for line in fd]
                except EnvironmentError, e:
                    raise util.Abort("couldn't open renames file %s: %s\n" %
                                     (self.bu.backupfile('renames'), e))
            finally:
                if fd and not fd.closed:
                    fd.close()

            if currrenamed != bakrenamed:
                return True

        return False

    def cleanup(self):
        '''Remove backed up uncommitted changes'''
        for fname in ('dirstate', 'diff', 'renames'):
            if os.path.exists(self.bu.backupfile(fname)):
                os.unlink(self.bu.backupfile(fname))


class CdmMetadataBackup(object):
    '''Backup of workspace metadata'''

    def __init__(self, backup, ws):
        self.bu = backup
        self.ws = ws
        self.files = ('hgrc', 'localtags', 'patches', 'cdm')

    def backup(self):
        '''Backup workspace metadata'''

        tar = None

        try:
            try:
                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
                                   'w:gz')
                tar.errorlevel = 2
            except (EnvironmentError, tarfile.TarError), e:
                raise util.Abort("couldn't open %s for writing: %s" %
                                 (self.bu.backupfile('metadata.tar.gz'), e))

            try:
                for elt in self.files:
                    fpath = self.ws.repo.join(elt)
                    if os.path.exists(fpath):
                        tar.add(fpath, elt)
            except (EnvironmentError, tarfile.TarError), e:
                #
                # tarfile.TarError doesn't include the tar member or file
                # in question, so we have to do so ourselves.
                #
                if isinstance(e, tarfile.TarError):
                    error = "%s: %s" % (elt, e)
                else:
                    error = str(e)

                raise util.Abort("couldn't backup metadata to %s:\n"
                                 "  %s" %
                                 (self.bu.backupfile('metadata.tar.gz'),
                                  error))
        finally:
            if tar and not tar.closed:
                tar.close()

    def old_restore(self):
        '''Restore workspace metadata from an pre-tar backup'''

        for fname in self.files:
            bfile = self.bu.backupfile(fname)
            wfile = self.ws.repo.join(fname)

            if os.path.exists(bfile):
                try:
                    shutil.copy2(bfile, wfile)
                except EnvironmentError, e:
                    raise util.Abort("couldn't restore metadata from %s:\n"
                                     "   %s" % (bfile, e))

    def tar_restore(self):
        '''Restore workspace metadata (from a tar-style backup)'''

        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
            tar = None

            try:
                try:
                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
                    tar.errorlevel = 2
                except (EnvironmentError, tarfile.TarError), e:
                    raise util.Abort("couldn't open %s: %s" %
                                 (self.bu.backupfile('metadata.tar.gz'), e))

                try:
                    for elt in tar:
                        tar.extract(elt, path=self.ws.repo.path)
                except (EnvironmentError, tarfile.TarError), e:
                    # Make sure the member name is in the exception message.
                    if isinstance(e, tarfile.TarError):
                        error = "%s: %s" % (elt.name, e)
                    else:
                        error = str(e)

                    raise util.Abort("couldn't restore metadata from %s:\n"
                                     "   %s" %
                                     (self.bu.backupfile('metadata.tar.gz'),
                                      error))
            finally:
                if tar and not tar.closed:
                    tar.close()

    def restore(self):
        '''Restore workspace metadata'''

        if os.path.exists(self.bu.backupfile('hgrc')):
            self.old_restore()
        else:
            self.tar_restore()

    def need_backup(self):
        '''Compare backed up workspace metadata to workspace'''

        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
            try:
                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
                tar.errorlevel = 2
            except (EnvironmentError, tarfile.TarError), e:
                raise util.Abort("couldn't open metadata tarball: %s\n"
                                 "   %s" %
                                 (self.bu.backupfile('metadata.tar.gz'), e))

            for elt in tar:
                fpath = self.ws.repo.join(elt.name)
                if not os.path.exists(fpath):
                    return True     # File in tar, not workspace

                if elt.isdir():     # Don't care about directories
                    continue

                if (elt.mtime != os.path.getmtime(fpath) or
                    elt.size != os.path.getsize(fpath)):
                    return True

            tarnames = tar.getnames()
            tar.close()
        else:
            tarnames = []

        for mfile in self.files:
            fpath = self.ws.repo.join(mfile)

            if os.path.isdir(fpath):
                # Directories in tarfile always end with a '/'
                if not mfile.endswith('/'):
                    mfile += '/'

                if mfile not in tarnames:
                    return True

                for root, dirs, files in os.walk(fpath, topdown=True):
                    for elt in files:
                        path = os.path.join(root, elt)

                        rpath = self.ws.repo.path
                        if not rpath.endswith('/'):
                            rpath += '/'

                        path = path.replace(rpath, '', 1)
                        if path not in tarnames:
                            return True # In workspace not tar
            else:
                if os.path.exists(fpath) and mfile not in tarnames:
                    return True

        return False

    def cleanup(self):
        '''Remove backed up workspace metadata'''
        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
            os.unlink(self.bu.backupfile('metadata.tar.gz'))


class CdmBackup(object):
    '''A backup of a given workspace'''

    def __init__(self, ui, ws, name):
        self.ws = ws
        self.ui = ui
        self.backupdir = self._find_backup_dir(name)

        #
        # The order of instances here controls the order the various operations
        # are run.
        #
        # There's some inherent dependence, in that on restore we need
        # to restore committed changes prior to uncommitted changes
        # (as the parent revision of any uncommitted changes is quite
        # likely to not exist until committed changes are restored).
        # Metadata restore can happen at any point, but happens last
        # as a matter of convention.
        #
        self.modules = [x(self, ws) for x in [CdmCommittedBackup,
                                              CdmUncommittedBackup,
                                              CdmMetadataBackup]]


        if os.path.exists(os.path.join(self.backupdir, 'latest')):
            generation = os.readlink(os.path.join(self.backupdir, 'latest'))
            self.generation = int(os.path.split(generation)[1])
        else:
            self.generation = 0

    def _find_backup_dir(self, name):
        '''Find the path to an appropriate backup directory based on NAME'''
        backupdir = None
        backupbase = None

        if os.path.isabs(name):
            return name

        if self.ui.config('cdm', 'backupdir'):
            backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
        else:
            home = None

            try:
                home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
            except KeyError:
                pass                    # Handled anyway

            if not home:
                raise util.Abort('Could not determine your HOME directory to '
                                 'find backup path')

            backupbase = os.path.join(home, 'cdm.backup')

        backupdir = os.path.join(backupbase, name)

        # If backupdir exists, it must be a directory.
        if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
            raise util.Abort('%s exists but is not a directory' % backupdir)

        return backupdir

    def backupfile(self, path):
        '''return full path to backup file FILE at GEN'''
        return os.path.join(self.backupdir, str(self.generation), path)

    def update_latest(self, gen):
        '''Update latest symlink to point to the current generation'''
        linkpath = os.path.join(self.backupdir, 'latest')

        if os.path.lexists(linkpath):
            os.unlink(linkpath)

        os.symlink(str(gen), linkpath)

    def create_gen(self, gen):
        '''Create a new backup generation'''
        try:
            os.makedirs(os.path.join(self.backupdir, str(gen)))
            self.update_latest(gen)
        except EnvironmentError, e:
            raise util.Abort("Couldn't create backup generation %s: %s" %
                             (os.path.join(self.backupdir, str(gen)), e))

    def need_backup(self):
        '''Compare backed up changes to workspace'''
        #
        # If there's no current backup generation, or the last backup was
        # invalid (lacking the dirstate file), we need a backup regardless
        # of anything else.
        #
        if (not self.generation or
            not os.path.exists(self.backupfile('dirstate'))):
            return True

        for x in self.modules:
            if x.need_backup():
                return True

        return False

    def backup(self):
        '''Take a backup of the current workspace'''

        if not os.path.exists(self.backupdir):
            try:
                os.makedirs(self.backupdir)
            except EnvironmentError, e:
                raise util.Abort('Could not create backup directory %s: %s' %
                                 (self.backupdir, e))

        self.generation += 1
        self.create_gen(self.generation)

        #
        # Lock the repo, so the backup can be consistent.  We need the
        # wlock too to make sure the dirstate parent doesn't change
        # underneath us.
        #

        lock = self.ws.repo.lock()
        wlock = self.ws.repo.lock()

        try:
            for x in self.modules:
                x.backup()
        except Exception, e:
            if isinstance(e, KeyboardInterrupt):
                self.ws.ui.warn("Interrupted\n")
            else:
                self.ws.ui.warn("Error: %s\n" % e)

                #
                # If it's not a 'normal' error, we want to print a stack
                # trace now in case the attempt to remove the partial
                # backup also fails, and raises a second exception.
                #
                if (not isinstance(e, (EnvironmentError, util.Abort))
                    or self.ws.ui.traceback):
                    traceback.print_exc()

            for x in self.modules:
                x.cleanup()

            os.rmdir(os.path.join(self.backupdir, str(self.generation)))
            self.generation -= 1

            if self.generation != 0:
                self.update_latest(self.generation)
            else:
                os.unlink(os.path.join(self.backupdir, 'latest'))

            raise util.Abort('Backup failed')

    def restore(self, gen=None):
        '''Restore workspace from backup

        Restores from backup generation GEN (defaulting to the latest)
        into workspace WS.'''

        wlock = self.ws.repo.wlock()
        lock = self.ws.repo.lock()

        if not os.path.exists(self.backupdir):
            raise util.Abort('Backup directory does not exist: %s' %
                             (self.backupdir))

        if gen:
            if not os.path.exists(os.path.join(self.backupdir, str(gen))):
                raise util.Abort('Backup generation does not exist: %s' %
                                 (os.path.join(self.backupdir, str(gen))))
            self.generation = int(gen)

        if not self.generation: # This is ok, 0 is not a valid generation
            raise util.Abort('Backup has no generations: %s' % self.backupdir)

        if not os.path.exists(self.backupfile('dirstate')):
            raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
                             (self.backupdir, self.generation))

        try:
            for x in self.modules:
                x.restore()
        except util.Abort, e:
            raise util.Abort('Error restoring workspace:\n'
                             '%s\n'
                             'Workspace will be partially restored' % e)