Backup.py revision 9a70fc3be3b1e966bf78825cdb8d509963a6f0a1
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
'''
Workspace backup
Backup format is:
backupdir/
wsname/
generation#/
dirstate (handled by CdmUncommittedBackup)
File containing dirstate nodeid (the tip we expect to be at
after applying the bundle).
bundle (handled by CdmCommittedBackup)
An Hg bundle containing outgoing committed changes.
nodes (handled by CdmCommittedBackup)
A text file listing the full (hex) nodeid of all nodes in
bundle, used by need_backup.
diff (handled by CdmUncommittedBackup)
A Git-formatted diff containing uncommitted changes.
renames (handled by CdmUncommittedBackup)
A list of renames in the working copy that have to be
applied manually, rather than by the diff.
metadata.tar.gz (handled by CdmMetadataBackup)
$CODEMGR_WS/.hg/hgrc
$CODEMGR_WS/.hg/localtags
$CODEMGR_WS/.hg/patches (Mq data)
latest -> generation#
Newest backup generation.
All files in a given backup generation, with the exception of
dirstate, are optional.
'''
import os, pwd, shutil, traceback, tarfile, time
from mercurial import changegroup, patch, node, util
from cStringIO import StringIO
class CdmCommittedBackup(object):
'''Backup of committed changes'''
def __init__(self, backup, ws):
self.ws = ws
self.bu = backup
self.files = ('bundle', 'nodes')
def _outgoing_nodes(self, parent):
'''Return a list of all outgoing nodes in hex format'''
if parent:
outgoing = self.ws.findoutgoing(parent)
nodes = self.ws.repo.changelog.nodesbetween(outgoing)[0]
return map(node.hex, nodes)
else:
return []
def backup(self):
'''Backup committed changes'''
parent = self.ws.parent()
if not parent:
self.ws.ui.warn('Workspace has no parent, committed changes will '
'not be backed up\n')
return
out = self.ws.findoutgoing(parent)
if not out:
return
cg = self.ws.repo.changegroup(out, 'bundle')
changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
outnodes = self._outgoing_nodes(parent)
if outnodes:
fp = None
try:
try:
fp = open(self.bu.backupfile('nodes'), 'w')
fp.write('%s\n' % '\n'.join(outnodes))
except EnvironmentError, e:
raise util.Abort("couldn't store outgoing nodes: %s" % e)
finally:
if fp and not fp.closed:
fp.close()
def restore(self):
'''Restore committed changes from backup'''
bfile = self.bu.backupfile('bundle')
if os.path.exists(bfile):
f = None
try:
try:
f = open(bfile, 'r')
bundle = changegroup.readbundle(f, bfile)
self.ws.repo.addchangegroup(bundle, 'strip',
'bundle:%s' % bfile)
except EnvironmentError, e:
raise util.Abort("couldn't restore committed changes: %s\n"
" %s" % (bfile, e))
finally:
if f and not f.closed:
f.close()
def need_backup(self):
'''Compare backup of committed changes to workspace'''
if os.path.exists(self.bu.backupfile('nodes')):
f = None
try:
try:
f = open(self.bu.backupfile('nodes'))
bnodes = set([line.rstrip('\r\n')
for line in f.readlines()])
f.close()
except EnvironmentError, e:
raise util.Abort("couldn't open backup node list: %s" % e)
finally:
if f and not f.closed:
f.close()
else:
bnodes = set()
outnodes = set(self._outgoing_nodes(self.ws.parent()))
if outnodes != bnodes:
return True
return False
def cleanup(self):
'''Remove backed up committed changes'''
for fname in self.files:
if os.path.exists(self.bu.backupfile(fname)):
os.unlink(self.bu.backupfile(fname))
class CdmUncommittedBackup(object):
'''Backup of uncommitted changes'''
def __init__(self, backup, ws):
self.ws = ws
self.bu = backup
def _clobbering_renames(self):
'''Return a list of pairs of files representing renames/copies
that clobber already versioned files. [(oldname newname)...]'''
#
# Note that this doesn't handle uncommitted merges
# as CdmUncommittedBackup itself doesn't.
#
wctx = self.ws.repo.workingctx()
parent = wctx.parents()[0]
ret = []
for fname in wctx.added() + wctx.modified():
rn = wctx.filectx(fname).renamed()
if rn and fname in parent:
ret.append((rn[0], fname))
return ret
def backup(self):
'''Backup uncommitted changes'''
if self.ws.merged():
raise util.Abort("Unable to backup an uncommitted merge.\n"
"Please complete your merge and commit")
dirstate = node.hex(self.ws.repo.changectx().node())
fp = None
try:
try:
fp = open(self.bu.backupfile('dirstate'), 'w')
fp.write(dirstate + '\n')
except EnvironmentError, e:
raise util.Abort("couldn't save working copy parent: %s" % e)
finally:
if fp and not fp.closed:
fp.close()
try:
try:
fp = open(self.bu.backupfile('renames'), 'w')
for cons in self._clobbering_renames():
fp.write("%s %s\n" % cons)
except EnvironmentError, e:
raise util.Abort("couldn't save clobbering copies: %s" % e)
finally:
if fp and not fp.closed:
fp.close()
try:
try:
fp = open(self.bu.backupfile('diff'), 'w')
patch.diff(self.ws.repo, fp=fp,
opts=patch.diffopts(self.ws.ui, opts={'git': True}))
except EnvironmentError, e:
raise util.Abort("couldn't save working copy diff: %s" % e)
finally:
if fp and not fp.closed:
fp.close()
def _dirstate(self):
'''Return the current working copy node'''
fp = None
try:
try:
fp = open(self.bu.backupfile('dirstate'))
dirstate = fp.readline().strip()
return dirstate
except EnvironmentError, e:
raise util.Abort("couldn't read saved parent: %s" % e)
finally:
if fp and not fp.closed:
fp.close()
def restore(self):
'''Restore uncommitted changes'''
diff = self.bu.backupfile('diff')
dirstate = self._dirstate()
try:
self.ws.clean(rev=dirstate)
except util.Abort, e:
raise util.Abort("couldn't update to saved node: %s" % e)
if not os.path.exists(diff):
return
#
# There's a race here whereby if the patch (or part thereof)
# is applied within the same second as the clean above (such
# that mtime doesn't change) and if the size of that file
# does not change, Hg may not see the change.
#
# We sleep a full second to avoid this, as sleeping merely
# until the next second begins would require very close clock
# synchronization on network filesystems.
#
time.sleep(1)
files = {}
try:
try:
fuzz = patch.patch(diff, self.ws.ui, strip=1,
cwd=self.ws.repo.root, files=files)
if fuzz:
raise util.Abort('working copy diff applied with fuzz')
except Exception, e:
raise util.Abort("couldn't apply working copy diff: %s\n"
" %s" % (diff, e))
finally:
patch.updatedir(self.ws.ui, self.ws.repo, files)
if not os.path.exists(self.bu.backupfile('renames')):
return
#
# We need to re-apply name changes where the new name
# (rename/copy destination) is an already versioned file, as
# Hg would otherwise ignore them.
#
try:
fp = open(self.bu.backupfile('renames'))
for line in fp:
source, dest = line.strip().split()
self.ws.repo.copy(source, dest)
except EnvironmentError, e:
raise util.Abort('unable to open renames file: %s' % e)
except ValueError:
raise util.Abort('corrupt renames file: %s' %
self.bu.backupfile('renames'))
def need_backup(self):
'''Compare backup of uncommitted changes to workspace'''
if self._dirstate() != node.hex(self.ws.repo.changectx().node()):
return True
curdiff = StringIO()
diff = self.bu.backupfile('diff')
fd = None
patch.diff(self.ws.repo, fp=curdiff,
opts=patch.diffopts(self.ws.ui, opts={'git': True}))
if os.path.exists(diff):
try:
try:
fd = open(diff)
backdiff = fd.read()
except EnvironmentError, e:
raise util.Abort("couldn't open backup diff %s\n"
" %s" % (diff, e))
finally:
if fd and not fd.closed:
fd.close()
else:
backdiff = ''
if backdiff != curdiff.getvalue():
return True
currrenamed = self._clobbering_renames()
bakrenamed = None
if os.path.exists(self.bu.backupfile('renames')):
try:
try:
fd = open(self.bu.backupfile('renames'))
bakrenamed = [line.strip().split(' ') for line in fd]
except EnvironmentError, e:
raise util.Abort("couldn't open renames file %s: %s\n" %
(self.bu.backupfile('renames'), e))
finally:
if fd and not fd.closed:
fd.close()
if currrenamed != bakrenamed:
return True
return False
def cleanup(self):
'''Remove backed up uncommitted changes'''
for fname in ('dirstate', 'diff', 'renames'):
if os.path.exists(self.bu.backupfile(fname)):
os.unlink(self.bu.backupfile(fname))
class CdmMetadataBackup(object):
'''Backup of workspace metadata'''
def __init__(self, backup, ws):
self.bu = backup
self.ws = ws
self.files = ('hgrc', 'localtags', 'patches', 'cdm')
def backup(self):
'''Backup workspace metadata'''
tar = None
try:
try:
tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
'w:gz')
tar.errorlevel = 2
except (EnvironmentError, tarfile.TarError), e:
raise util.Abort("couldn't open %s for writing: %s" %
(self.bu.backupfile('metadata.tar.gz'), e))
try:
for elt in self.files:
fpath = self.ws.repo.join(elt)
if os.path.exists(fpath):
tar.add(fpath, elt)
except (EnvironmentError, tarfile.TarError), e:
#
# tarfile.TarError doesn't include the tar member or file
# in question, so we have to do so ourselves.
#
if isinstance(e, tarfile.TarError):
error = "%s: %s" % (elt, e)
else:
error = str(e)
raise util.Abort("couldn't backup metadata to %s:\n"
" %s" %
(self.bu.backupfile('metadata.tar.gz'),
error))
finally:
if tar and not tar.closed:
tar.close()
def old_restore(self):
'''Restore workspace metadata from an pre-tar backup'''
for fname in self.files:
bfile = self.bu.backupfile(fname)
wfile = self.ws.repo.join(fname)
if os.path.exists(bfile):
try:
shutil.copy2(bfile, wfile)
except EnvironmentError, e:
raise util.Abort("couldn't restore metadata from %s:\n"
" %s" % (bfile, e))
def tar_restore(self):
'''Restore workspace metadata (from a tar-style backup)'''
if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
tar = None
try:
try:
tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
tar.errorlevel = 2
except (EnvironmentError, tarfile.TarError), e:
raise util.Abort("couldn't open %s: %s" %
(self.bu.backupfile('metadata.tar.gz'), e))
try:
for elt in tar:
tar.extract(elt, path=self.ws.repo.path)
except (EnvironmentError, tarfile.TarError), e:
# Make sure the member name is in the exception message.
if isinstance(e, tarfile.TarError):
error = "%s: %s" % (elt.name, e)
else:
error = str(e)
raise util.Abort("couldn't restore metadata from %s:\n"
" %s" %
(self.bu.backupfile('metadata.tar.gz'),
error))
finally:
if tar and not tar.closed:
tar.close()
def restore(self):
'''Restore workspace metadata'''
if os.path.exists(self.bu.backupfile('hgrc')):
self.old_restore()
else:
self.tar_restore()
def need_backup(self):
'''Compare backed up workspace metadata to workspace'''
if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
try:
tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
tar.errorlevel = 2
except (EnvironmentError, tarfile.TarError), e:
raise util.Abort("couldn't open metadata tarball: %s\n"
" %s" %
(self.bu.backupfile('metadata.tar.gz'), e))
for elt in tar:
fpath = self.ws.repo.join(elt.name)
if not os.path.exists(fpath):
return True # File in tar, not workspace
if elt.isdir(): # Don't care about directories
continue
if (elt.mtime != os.path.getmtime(fpath) or
elt.size != os.path.getsize(fpath)):
return True
tarnames = tar.getnames()
tar.close()
else:
tarnames = []
for mfile in self.files:
fpath = self.ws.repo.join(mfile)
if os.path.isdir(fpath):
# Directories in tarfile always end with a '/'
if not mfile.endswith('/'):
mfile += '/'
if mfile not in tarnames:
return True
for root, dirs, files in os.walk(fpath, topdown=True):
for elt in files:
path = os.path.join(root, elt)
rpath = self.ws.repo.path
if not rpath.endswith('/'):
rpath += '/'
path = path.replace(rpath, '', 1)
if path not in tarnames:
return True # In workspace not tar
else:
if os.path.exists(fpath) and mfile not in tarnames:
return True
return False
def cleanup(self):
'''Remove backed up workspace metadata'''
if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
os.unlink(self.bu.backupfile('metadata.tar.gz'))
class CdmBackup(object):
'''A backup of a given workspace'''
def __init__(self, ui, ws, name):
self.ws = ws
self.ui = ui
self.backupdir = self._find_backup_dir(name)
#
# The order of instances here controls the order the various operations
# are run.
#
# There's some inherent dependence, in that on restore we need
# to restore committed changes prior to uncommitted changes
# (as the parent revision of any uncommitted changes is quite
# likely to not exist until committed changes are restored).
# Metadata restore can happen at any point, but happens last
# as a matter of convention.
#
self.modules = [x(self, ws) for x in [CdmCommittedBackup,
CdmUncommittedBackup,
CdmMetadataBackup]]
if os.path.exists(os.path.join(self.backupdir, 'latest')):
generation = os.readlink(os.path.join(self.backupdir, 'latest'))
self.generation = int(os.path.split(generation)[1])
else:
self.generation = 0
def _find_backup_dir(self, name):
'''Find the path to an appropriate backup directory based on NAME'''
backupdir = None
backupbase = None
if os.path.isabs(name):
return name
if self.ui.config('cdm', 'backupdir'):
backupbase = os.path.expanduser(self.ui.config('cdm', 'backupdir'))
else:
home = None
try:
home = os.getenv('HOME') or pwd.getpwuid(os.getuid()).pw_dir
except KeyError:
pass # Handled anyway
if not home:
raise util.Abort('Could not determine your HOME directory to '
'find backup path')
backupbase = os.path.join(home, 'cdm.backup')
backupdir = os.path.join(backupbase, name)
# If backupdir exists, it must be a directory.
if (os.path.exists(backupdir) and not os.path.isdir(backupdir)):
raise util.Abort('%s exists but is not a directory' % backupdir)
return backupdir
def backupfile(self, path):
'''return full path to backup file FILE at GEN'''
return os.path.join(self.backupdir, str(self.generation), path)
def update_latest(self, gen):
'''Update latest symlink to point to the current generation'''
linkpath = os.path.join(self.backupdir, 'latest')
if os.path.lexists(linkpath):
os.unlink(linkpath)
os.symlink(str(gen), linkpath)
def create_gen(self, gen):
'''Create a new backup generation'''
try:
os.makedirs(os.path.join(self.backupdir, str(gen)))
self.update_latest(gen)
except EnvironmentError, e:
raise util.Abort("Couldn't create backup generation %s: %s" %
(os.path.join(self.backupdir, str(gen)), e))
def need_backup(self):
'''Compare backed up changes to workspace'''
#
# If there's no current backup generation, or the last backup was
# invalid (lacking the dirstate file), we need a backup regardless
# of anything else.
#
if (not self.generation or
not os.path.exists(self.backupfile('dirstate'))):
return True
for x in self.modules:
if x.need_backup():
return True
return False
def backup(self):
'''Take a backup of the current workspace'''
if not os.path.exists(self.backupdir):
try:
os.makedirs(self.backupdir)
except EnvironmentError, e:
raise util.Abort('Could not create backup directory %s: %s' %
(self.backupdir, e))
self.generation += 1
self.create_gen(self.generation)
#
# Lock the repo, so the backup can be consistent. We need the
# wlock too to make sure the dirstate parent doesn't change
# underneath us.
#
lock = self.ws.repo.lock()
wlock = self.ws.repo.lock()
try:
for x in self.modules:
x.backup()
except Exception, e:
if isinstance(e, KeyboardInterrupt):
self.ws.ui.warn("Interrupted\n")
else:
self.ws.ui.warn("Error: %s\n" % e)
#
# If it's not a 'normal' error, we want to print a stack
# trace now in case the attempt to remove the partial
# backup also fails, and raises a second exception.
#
if (not isinstance(e, (EnvironmentError, util.Abort))
or self.ws.ui.traceback):
traceback.print_exc()
for x in self.modules:
x.cleanup()
os.rmdir(os.path.join(self.backupdir, str(self.generation)))
self.generation -= 1
if self.generation != 0:
self.update_latest(self.generation)
else:
os.unlink(os.path.join(self.backupdir, 'latest'))
raise util.Abort('Backup failed')
def restore(self, gen=None):
'''Restore workspace from backup
Restores from backup generation GEN (defaulting to the latest)
into workspace WS.'''
wlock = self.ws.repo.wlock()
lock = self.ws.repo.lock()
if not os.path.exists(self.backupdir):
raise util.Abort('Backup directory does not exist: %s' %
(self.backupdir))
if gen:
if not os.path.exists(os.path.join(self.backupdir, str(gen))):
raise util.Abort('Backup generation does not exist: %s' %
(os.path.join(self.backupdir, str(gen))))
self.generation = int(gen)
if not self.generation: # This is ok, 0 is not a valid generation
raise util.Abort('Backup has no generations: %s' % self.backupdir)
if not os.path.exists(self.backupfile('dirstate')):
raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
(self.backupdir, self.generation))
try:
for x in self.modules:
x.restore()
except util.Abort, e:
raise util.Abort('Error restoring workspace:\n'
'%s\n'
'Workspace will be partially restored' % e)