backup revision 4fbbf628eda08ffdeb98cb41fb7e9001050eec3a
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#!/usr/bin/env ruby
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# You can find more extensive documentation of this script at
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# https://github.com/ontohub/ontohub/blob/staging/doc/backup_and_restore_of_ontohub_data.md
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Description
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# This backup script creates and restores backups of ontohub data. It includes:
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# * bare git repositories (data/repositories)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# * named symlinks to git repositories (data/git_daemon)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# * the postgres database
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# Usage
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# First note: Run this as the root user, e.g. with sudo.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# To create a backup, run this script with the argument `create`:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# # script/backup create
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Then a backup named with the current date and time is created in the
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# backup directory (see below).
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# To restore a backup, run this script with the argument `restore <backup name>`
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# # script/backup restore 2015-01-01_00-00
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov# Then the selected backup is fully restored
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Backup directory
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# For development machines, the backup directory is:
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# <rails root>/tmp/backup/
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# And for production machines, the backup directory is:
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# /home/ontohub/ontohub_data_backup
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Super user privileges
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# To create and restore, we need root privileges. Otherwise file modes are not
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# preserved. This script will call `sudo` when needed and inform you about the
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# reason for calling `sudo`. If you don't allow sudo, a backup will be created
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# or restored anyway, but the file modes and ownership are not preserved.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Then, you need to adjust them manually.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# Maintenance mode
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# While backing up and restoring the data, the maintenance mode is activated.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe# This way we guarantee data consistency of the backup.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowerequire 'tmpdir.rb'
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovrequire 'fileutils'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowerequire 'pathname'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowerequire 'open3'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowemodule Backup
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe class Backup
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe # Amount of backups that have to be there at least
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe BACKUPS_COUNT = 30
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe # Backups are kept for at least 365 days
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe BACKUPS_VALIDITY_TIME = 365 * 60 * 60 * 24
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe MAINTENANCE_FILE = 'maintenance.txt'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe SQL_DUMP_FILE = 'ontohub_sql_dump.postgresql'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe REPOSITORY_FILE = 'ontohub_repositories.tar.gz'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe DATA_DIRS = %w(repositories git_daemon)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe # Use 'sudo' on most systems
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe SUDO_BINARY = '+'
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe attr_reader :db_name, :data_root, :backup_root, :backup_instance_dir
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe attr_reader :dry_run, :verbose, :sql_dump_as_db_user
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe def initialize(db_name, data_root, backup_root,
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe verbose: false, dry_run: true, sql_dump_as_db_user: nil, user: nil)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @db_name = db_name
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @backup_root = Pathname.new(backup_root)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @data_root = Pathname.new(data_root)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @data_root_basename = @data_root.basename.to_s
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @data_dirs = DATA_DIRS.map { |dir| File.join(@data_root_basename, dir) }
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe @user = user
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov @dry_run = dry_run
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov @verbose = verbose
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov @sql_dump_as_db_user = sql_dump_as_db_user
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov def create
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov puts 'Creating backup...'
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov enable_maintenance_mode
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov initialize_backup
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov create_sql_dump
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov create_repository_archive
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov # We needed to create the directory for the script to continue later on.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov Dir.rmdir(backup_instance_dir) if dry_run
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov disable_maintenance_mode
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov puts "Created backup in #{backup_instance_dir}"
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov self.class.prune(backup_root)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov def restore(backup_name)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov enable_maintenance_mode
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov initialize_restore(backup_name)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov restore_sql_dump
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov restore_repository_archive
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov disable_maintenance_mode
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov puts "Restored backup from #{backup_instance_dir}"
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov def self.prune(backup_root)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if !Dir.exists?(backup_root)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov $stderr.puts "Nothing to prune: There is no backup directory."
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov return
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov now = Time.now
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov backup_dirs_allowed_to_delete(Dir.new(backup_root).entries).each do |dir|
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov backup = backup_root.join(dir)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if now - File.new(backup).ctime > BACKUPS_VALIDITY_TIME
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov puts "removing old backup: #{dir}"
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov FileUtils.rm_r(backup)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe end
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe end
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov protected
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov def new_backup_name
Time.now.strftime("%Y-%m-%d_%H-%M-%S")
end
def initialize_backup
@backup_instance_dir = backup_root.join(new_backup_name)
puts "FileUtils.mkdir_p #{backup_instance_dir}" if verbose
# Create directory even in dry run to let the script continue.
FileUtils.mkdir_p(backup_instance_dir)
end
def create_sql_dump
puts 'Creating SQL dump...'
Dir.chdir(backup_instance_dir) do
exec('pg_dump', *pg_user_switch, '-Fc', db_name, '-f', SQL_DUMP_FILE,
user: @user)
end
end
def create_repository_archive
puts 'Creating repository archive...'
Dir.chdir(data_root.join('..')) do
archive_file = backup_instance_dir.join(REPOSITORY_FILE)
exec('tar', verbose ? '-v' : '', '-cf', archive_file.to_s, *@data_dirs,
user: @user)
end
end
def initialize_restore(backup_name)
@backup_instance_dir = backup_root.join(backup_name)
unless Dir.exists?(backup_instance_dir)
$stderr.puts (
"Error: Backup '#{backup_name}' does not exist in #{backup_root}.")
exit
end
end
def restore_sql_dump
'Restoring SQL dump...'
Dir.chdir(backup_instance_dir) do
exec('pg_restore', '-n', 'public',
'-c', *pg_user_switch,
'-d', db_name,
SQL_DUMP_FILE,
user: @user)
end
end
def restore_repository_archive
puts 'Restoring repository archive...'
Dir.chdir(data_root.join('..')) do
tmpdir = Dir.mktmpdir
move_data_dirs_to_tmpdir(tmpdir)
begin
extract_archive
remove_tmpdir(tmpdir)
rescue => e
puts <<-MSG
An error occured while restoring the repositories:
#{e.message}
You can find the pre-restore repositories at #{tmpdir}
Do something about it.
MSG
raise e
end
end
end
def move_data_dirs_to_tmpdir(tmpdir)
puts "FileUtils.mv(#{@data_dirs}, #{tmpdir})" if verbose
FileUtils.mv(@data_dirs, tmpdir) unless dry_run
rescue Errno::EACCES
puts <<-MSG
As the current user I have no access to move the repository data
directories #{@data_dirs.join(' ')} to a temporary directory #{tmpdir}.
This is used as a backup for the case of an error while restoring.
To continue, I try the command again using sudo.
MSG
exec('mv', *@data_dirs, tmpdir, user: 'root')
end
def extract_archive
archive_file = backup_instance_dir.join(REPOSITORY_FILE)
puts <<-MSG
Super user privileges are needed to reset the file permissions as
they were before the backup. If you refuse to enter the password
(Ctl-C) or enter a wrong password, only the permissions will not be
restored and all restored files will belong to the current user/group.
MSG
try_as_sudo_with_fallback('tar', verbose ? '-v' : '', '-xf',
archive_file.to_s, *@data_dirs)
end
def remove_tmpdir(tmpdir)
puts "FileUtils.remove_entry(#{tmpdir})" if verbose
FileUtils.remove_entry(tmpdir) # even do this in dry run
rescue Errno::EACCES
puts <<-MSG
As the current user I have no access to remove the temporary
directory #{tmpdir}.
To continue, I try the command again using sudo.
MSG
exec('rm', '-r', tmpdir, user: 'root')
end
def enable_maintenance_mode
puts 'Enabling maintenance mode...'
if File.exist?(maintenance_file)
$stderr.puts 'Maintenance mode was already enabled.'
$stderr.puts "Please check the file #{maintenance_file}"
$stderr.puts 'Aborting.'
exit
end
puts "FileUtils.touch #{maintenance_file}" if verbose
FileUtils.touch maintenance_file unless dry_run
end
def disable_maintenance_mode
puts 'Disabling maintenance mode...'
puts "FileUtils.rm #{maintenance_file}" if verbose
FileUtils.rm maintenance_file unless dry_run
end
# Execute a command as the given user.
def exec(*args, user: nil)
puts "[executing next command in #{Dir.getwd}]" if verbose
out = args.join(' ')
puts out if verbose
if !dry_run
if user == 'root'
system([sudo, *args])
elsif user
system([sudo, 'su', '-', user, '-c', escape_arguments(args)])
else
system(*args)
end
end
end
def sudo
SUDO_BINARY
end
def escape_arguments(args)
([args[0]] + *args[1..-1].map { |a| %("#{a}")}).join(' ')
end
def maintenance_file
data_root.join(MAINTENANCE_FILE)
end
def pg_user_switch
sql_dump_as_db_user ? %W(-U #{sql_dump_as_db_user}) : []
end
def self.backup_dirs_allowed_to_delete(entries)
entries.reject{ |entry| %w(. ..).include?(entry) }[0..-(BACKUPS_COUNT+1)]
end
end
end
def data_root(rails_root)
if on_development_system?(rails_root)
File.realpath(rails_root.join('data'))
else
ENV['DATA_ROOT'] ||'/data/git'
end
end
def on_development_system?(rails_root)
data_path = rails_root.join('data')
File.exist?(data_path) && !File.symlink?(data_path)
end
# Don't allow this to be run as the root user.
if ENV['USER'] != 'root'
puts 'Running this script as a normal user is disabled.'
puts 'Please run it as root.'
exit
end
# We assume, this script runs in "RAILS_ROOT/script/".
RAILS_ROOT = Pathname.new(__FILE__).dirname.join('..')
BACKUP_ROOT_PRODUCTION = '/local/home/ontohub/ontohub_data_backup'
USER = 'ontohub'
DATABASE =
if on_development_system?(RAILS_ROOT)
'ontohub_development'
else
'ontohub'
end
BACKUP_ROOT =
if on_development_system?(RAILS_ROOT)
RAILS_ROOT.join('tmp', 'backup')
else
File.realpath(BACKUP_ROOT_PRODUCTION)
end
backup = Backup::Backup.new(DATABASE, data_root(RAILS_ROOT), BACKUP_ROOT,
sql_dump_as_db_user: on_development_system?(RAILS_ROOT) ? 'postgres' : 'ontohub',
user: USER,
dry_run: false, verbose: true)
case ARGV.first
when 'create'
backup.create
when 'restore'
if ARGV.length == 1
$stderr.puts(
'To restore a backup, you need to specify one with the arguments')
$stderr.puts('"restore backup_name"')
exit
end
backup_name = ARGV[1]
backup.restore(backup_name)
when 'prune'
Backup::Backup.prune(BACKUP_ROOT)
else
$stderr.puts 'unknown or missing parameter'
$stderr.puts 'use parameter "create" or "restore <backup_name>" or "prune"'
exit
end