backup revision 706a446ea5c5c1644b53a82fcadd8c9080ba3b4f
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence#!/local/usr/ruby/shims/ruby
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# The first line is for deployment machines only. For local machines, use:
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence#!/usr/bin/env ruby
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# You can find more extensive documentation of this script at
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# https://github.com/ontohub/ontohub/blob/staging/doc/backup_and_restore_of_ontohub_data.md
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Description
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# This backup script creates and restores backups of ontohub data. It includes:
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# * bare git repositories (data/repositories)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# * named symlinks to git repositories (data/git_daemon and data/git_ssh)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# * the postgres database
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence#
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Usage
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# First note: Run this as the root user, e.g. with sudo.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# To create a backup, run this script with the argument `create`:
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# # script/backup create
9cd39d7306a472544733aff760d2916888d2b1f4David Lawrence# Then a backup named with the current date and time is created in the
9cd39d7306a472544733aff760d2916888d2b1f4David Lawrence# backup directory (see below).
9cd39d7306a472544733aff760d2916888d2b1f4David Lawrence#
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# To restore a backup, run this script with the argument `restore <backup name>`
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# # script/backup restore 2015-01-01_00-00
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Then the selected backup is fully restored
9cd39d7306a472544733aff760d2916888d2b1f4David Lawrence#
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Backup directory
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# For production machines, the backup directory is:
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# /data/ontohub_data_backup
5273184ae1ae4fbb30c54d59c6c40ab2c68312afMark Andrews#
5273184ae1ae4fbb30c54d59c6c40ab2c68312afMark Andrews# Super user privileges
5273184ae1ae4fbb30c54d59c6c40ab2c68312afMark Andrews# To create and restore, we need root privileges. Otherwise file modes are not
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# preserved. This script will call `sudo` when needed and inform you about the
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# reason for calling `sudo`. If you don't allow sudo, a backup will be created
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# or restored anyway, but the file modes and ownership are not preserved.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Then, you need to adjust them manually.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence#
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# Maintenance mode
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# While backing up and restoring the data, the maintenance mode is activated.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence# This way we guarantee data consistency of the backup.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrencerequire 'tmpdir.rb'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrencerequire 'fileutils'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrencerequire 'pathname'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrencerequire 'open3'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrencemodule Backup
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence class Backup
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence # Amount of backups that have to be there at least
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence BACKUPS_COUNT = 30
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence # Backups are kept for at least 365 days
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence BACKUPS_VALIDITY_TIME = 365 * 60 * 60 * 24
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence MAINTENANCE_FILE = 'maintenance.txt'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence SQL_DUMP_FILE = 'ontohub_sql_dump.postgresql'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence REPOSITORY_FILE = 'ontohub_repositories.tar.gz'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence DATA_DIRS = %w(repositories git_daemon git_ssh)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence # Use 'sudo' on most systems
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence SUDO_BINARY = '+'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence attr_reader :db_name, :data_root, :backup_root, :backup_instance_dir
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence attr_reader :dry_run, :verbose, :sql_dump_as_db_user
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def initialize(db_name, data_root, backup_root,
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence verbose: false, dry_run: true, sql_dump_as_db_user: nil,
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence user: nil, group: nil)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @db_name = db_name
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @backup_root = Pathname.new(backup_root)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @data_root = Pathname.new(data_root)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @data_root_basename = @data_root.basename.to_s
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @data_dirs = DATA_DIRS.map { |dir| File.join(@data_root_basename, dir) }
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @user = user
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @group = group
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @dry_run = dry_run
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @verbose = verbose
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @sql_dump_as_db_user = sql_dump_as_db_user
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def create
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts 'Creating backup...'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence enable_maintenance_mode
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence initialize_backup
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence create_sql_dump
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence create_repository_archive
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence # We needed to create the directory for the script to continue later on.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence Dir.rmdir(backup_instance_dir) if dry_run
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence disable_maintenance_mode
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts "Created backup in #{backup_instance_dir}"
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence self.class.prune(backup_root)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def restore(backup_name)
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson enable_maintenance_mode
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence initialize_restore(backup_name)
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence restore_sql_dump
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence restore_repository_archive
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence disable_maintenance_mode
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts "Restored backup from #{backup_instance_dir}"
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence def self.prune(backup_root)
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence if !Dir.exists?(backup_root)
a890fbefa3a143ff0513854c895e0f04c8d72bd5David Lawrence $stderr.puts "Nothing to prune: There is no backup directory."
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence return
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence now = Time.now
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence backup_dirs_allowed_to_delete(Dir.new(backup_root).entries).each do |dir|
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence backup = backup_root.join(dir)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence if now - File.new(backup).ctime > BACKUPS_VALIDITY_TIME
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts "removing old backup: #{dir}"
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence FileUtils.rm_r(backup)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence protected
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def new_backup_name
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence Time.now.strftime("%Y-%m-%d_%H-%M-%S")
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def initialize_backup
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @backup_instance_dir = backup_root.join(new_backup_name)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts "FileUtils.mkdir_p #{backup_instance_dir}" if verbose
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence # Create directory even in dry run to let the script continue.
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence FileUtils.mkdir_p(backup_instance_dir)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts "FileUtils.chown #{@user} #{@group} #{backup_instance_dir}" if verbose
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence FileUtils.chown(@user, @group, backup_instance_dir)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def create_sql_dump
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts 'Creating SQL dump...'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence Dir.chdir(backup_instance_dir) do
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence exec('pg_dump', *pg_user_switch, '-Fc', db_name,
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence '-f', backup_instance_dir.join(SQL_DUMP_FILE), user: @user)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def create_repository_archive
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence puts 'Creating repository archive...'
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence Dir.chdir(data_root.join('..')) do
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence archive_file = backup_instance_dir.join(REPOSITORY_FILE)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence exec('tar', verbose ? '-v' : '', '-cf', archive_file.to_s, *@data_dirs,
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence user: @user)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence def initialize_restore(backup_name)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence @backup_instance_dir = backup_root.join(backup_name)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence unless Dir.exists?(backup_instance_dir)
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson $stderr.puts (
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson "Error: Backup '#{backup_name}' does not exist in #{backup_root}.")
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson exit
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson end
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson end
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson def restore_sql_dump
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson 'Restoring SQL dump...'
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson Dir.chdir(backup_instance_dir) do
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson exec('pg_restore', '-n', 'public',
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson '-c', *pg_user_switch,
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson '-d', db_name,
996f4a8bc34cb0203ce6a40ff82bca8bf32423ccAndreas Gustafsson SQL_DUMP_FILE,
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence user: @user)
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
8b7304a34c751e519ede7d00b77f1f962c0a37e4David Lawrence end
def restore_repository_archive
puts 'Restoring repository archive...'
Dir.chdir(data_root.join('..')) do
tmpdir = Dir.mktmpdir
move_data_dirs_to_tmpdir(tmpdir)
begin
extract_archive
remove_tmpdir(tmpdir)
rescue => e
puts <<-MSG
An error occured while restoring the repositories:
#{e.message}
You can find the pre-restore repositories at #{tmpdir}
Do something about it.
MSG
raise e
end
end
end
def move_data_dirs_to_tmpdir(tmpdir)
puts "FileUtils.mv(#{@data_dirs}, #{tmpdir})" if verbose
FileUtils.mv(@data_dirs, tmpdir) unless dry_run
rescue Errno::EACCES
puts <<-MSG
As the current user I have no access to move the repository data
directories #{@data_dirs.join(' ')} to a temporary directory #{tmpdir}.
This is used as a backup for the case of an error while restoring.
To continue, I try the command again using sudo.
MSG
exec('mv', *@data_dirs, tmpdir, user: 'root') unless dry_run
end
def extract_archive
archive_file = backup_instance_dir.join(REPOSITORY_FILE)
puts <<-MSG
Super user privileges are needed to reset the file permissions as
they were before the backup. If you refuse to enter the password
(Ctl-C) or enter a wrong password, only the permissions will not be
restored and all restored files will belong to the current user/group.
MSG
exec('tar', verbose ? 'vxf' : 'xf', archive_file.to_s, *@data_dirs,
user: 'root')
end
def remove_tmpdir(tmpdir)
puts "FileUtils.remove_entry(#{tmpdir})" if verbose
FileUtils.remove_entry(tmpdir) # even do this in dry run
rescue Errno::EACCES
puts <<-MSG
As the current user I have no access to remove the temporary
directory #{tmpdir}.
To continue, I try the command again using sudo.
MSG
exec('rm', '-r', tmpdir, user: 'root')
end
def enable_maintenance_mode
puts 'Enabling maintenance mode...'
if File.exist?(maintenance_file)
$stderr.puts 'Maintenance mode was already enabled.'
$stderr.puts "Please check the file #{maintenance_file}"
$stderr.puts 'Aborting.'
exit
end
puts "FileUtils.touch #{maintenance_file}" if verbose
FileUtils.touch maintenance_file unless dry_run
end
def disable_maintenance_mode
puts 'Disabling maintenance mode...'
puts "FileUtils.rm #{maintenance_file}" if verbose
FileUtils.rm maintenance_file unless dry_run
end
# Execute a command as the given user.
def exec(*args, user: nil)
print "[executing next command in #{Dir.getwd}" if verbose
print " as user #{user}" if verbose && user
puts "]" if verbose
out = args.join(' ')
puts out if verbose
if !dry_run
if user == 'root'
exec_system(*[sudo, *args])
elsif user
# This looks strange because of the combination of + and sudo.
# It is needed on our deployment machines to get the environment right.
# On other machines, remove the call of +.
exec_system(*['+', 'sudo', '-u', user, 'bash', '-c',
"cd #{Dir.getwd} && #{escape_arguments(args)}"])
else
exec_system(*args)
end
end
end
def sudo
SUDO_BINARY
end
def exec_system(*args)
# puts args.join(' ') # For debugging
system(*args)
end
def escape_arguments(args)
rest = args[1..-1].map do |arg|
if arg.to_s.include?(' ')
%("#{arg.gsub('"', '\"')}")
else
arg
end
end
([args[0]] + rest).join(' ')
end
def maintenance_file
data_root.join(MAINTENANCE_FILE)
end
def pg_user_switch
sql_dump_as_db_user ? %W(-U #{sql_dump_as_db_user}) : []
end
def self.backup_dirs_allowed_to_delete(entries)
entries.reject{ |entry| %w(. ..).include?(entry) }[0..-(BACKUPS_COUNT+1)]
end
end
end
def data_root(rails_root)
ENV['DATA_ROOT'] ||'/data/git'
end
# Don't allow this to be run as the root user.
if ENV['USER'] != 'root'
puts 'Running this script as a normal user is disabled.'
puts 'Please run it as root.'
exit
end
# We assume, this script runs in "RAILS_ROOT/script/".
RAILS_ROOT = Pathname.new(__FILE__).dirname.join('..')
BACKUP_ROOT_PRODUCTION = '/data/ontohub_data_backup'
USER = 'ontohub'
GROUP = 'webservd'
DATABASE = 'ontohub'
unless File.exist?(BACKUP_ROOT_PRODUCTION)
FileUtils.mkdir_p(BACKUP_ROOT_PRODUCTION)
end
BACKUP_ROOT = File.realpath(BACKUP_ROOT_PRODUCTION)
unless File.directory?(BACKUP_ROOT)
$stderr.puts "Target path is not a directory: #{BACKUP_ROOT}"
end
backup = Backup::Backup.new(DATABASE, data_root(RAILS_ROOT), BACKUP_ROOT,
sql_dump_as_db_user: 'ontohub',
user: USER, group: GROUP,
dry_run: false, verbose: true)
case ARGV.first
when 'create'
backup.create
when 'restore'
if ARGV.length == 1
$stderr.puts(
'To restore a backup, you need to specify one with the arguments')
$stderr.puts('"restore backup_name"')
exit
end
backup_name = ARGV[1]
backup.restore(backup_name)
when 'prune'
Backup::Backup.prune(BACKUP_ROOT)
else
$stderr.puts 'unknown or missing parameter'
$stderr.puts 'use parameter "create" or "restore <backup_name>" or "prune"'
exit
end