backup revision 3edb9eac22c71a937953dd2ad4911d12fb22c6c6
1516N/A#!/local/usr/ruby/shims/ruby
26N/A# The first line is for deployment machines only. For local machines, use:
26N/A#!/usr/bin/env ruby
26N/A
26N/A# You can find more extensive documentation of this script at
26N/A# https://github.com/ontohub/ontohub/blob/staging/doc/backup_and_restore_of_ontohub_data.md
26N/A
26N/A# Description
26N/A# This backup script creates and restores backups of ontohub data. It includes:
26N/A# * bare git repositories (data/repositories)
26N/A# * named symlinks to git repositories (data/git_daemon and data/git_ssh)
26N/A# * the postgres database
26N/A#
26N/A# Usage
26N/A# First note: Run this as the root user, e.g. with sudo.
26N/A# To create a backup, run this script with the argument `create`:
26N/A# # script/backup create
26N/A# Then a backup named with the current date and time is created in the
26N/A# backup directory (see below).
26N/A#
26N/A# To restore a backup, run this script with the argument `restore <backup name>`
26N/A# # script/backup restore 2015-01-01_00-00
2466N/A# Then the selected backup is fully restored
2678N/A#
2466N/A# Backup directory
26N/A# For production machines, the backup directory is:
2092N/A# /data/ontohub_data_backup
1431N/A#
1218N/A# Super user privileges
1431N/A# To create and restore, we need root privileges. Otherwise file modes are not
1218N/A# preserved. This script will call `sudo` when needed and inform you about the
576N/A# reason for calling `sudo`. If you don't allow sudo, a backup will be created
576N/A# or restored anyway, but the file modes and ownership are not preserved.
1431N/A# Then, you need to adjust them manually.
1431N/A#
1431N/A# Maintenance mode
1431N/A# While backing up and restoring the data, the maintenance mode is activated.
1431N/A# This way we guarantee data consistency of the backup.
1431N/A
1431N/A
1431N/Arequire 'tmpdir.rb'
2466N/Arequire 'fileutils'
2466N/Arequire 'pathname'
2466N/Arequire 'open3'
2466N/A
2466N/Amodule Backup
2466N/A class Backup
2466N/A # Amount of backups that have to be there at least
2466N/A BACKUPS_COUNT = 30
2466N/A # Backups are kept for at least 365 days
576N/A BACKUPS_VALIDITY_TIME = 365 * 60 * 60 * 24
576N/A
576N/A MAINTENANCE_FILE = 'maintenance.txt'
576N/A
576N/A SQL_DUMP_FILE = 'ontohub_sql_dump.postgresql'
576N/A REPOSITORY_FILE = 'ontohub_repositories.tar.gz'
1431N/A
1431N/A DATA_DIRS = %w(repositories git_daemon git_ssh)
1431N/A
696N/A # Use 'sudo' on most systems
2092N/A SUDO_BINARY = '+'
1448N/A
1448N/A attr_reader :db_name, :data_root, :backup_root, :backup_instance_dir
1448N/A attr_reader :dry_run, :verbose, :sql_dump_as_db_user
1448N/A
1218N/A def initialize(db_name, data_root, backup_root,
1448N/A verbose: false, dry_run: true, sql_dump_as_db_user: nil,
1218N/A user: nil, group: nil)
1448N/A @db_name = db_name
1448N/A @backup_root = Pathname.new(backup_root)
1218N/A @data_root = Pathname.new(data_root)
1218N/A @data_root_basename = @data_root.basename.to_s
1218N/A @data_dirs = DATA_DIRS.map { |dir| File.join(@data_root_basename, dir) }
1218N/A @user = user
1448N/A @group = group
1448N/A
1448N/A @dry_run = dry_run
2678N/A @verbose = verbose
2678N/A @sql_dump_as_db_user = sql_dump_as_db_user
2678N/A end
2678N/A
2678N/A def create
1218N/A puts 'Creating backup...'
1218N/A enable_maintenance_mode
1218N/A initialize_backup
1218N/A create_sql_dump
1218N/A create_repository_archive
1218N/A # We needed to create the directory for the script to continue later on.
1218N/A Dir.rmdir(backup_instance_dir) if dry_run
1218N/A disable_maintenance_mode
1218N/A puts "Created backup in #{backup_instance_dir}"
1218N/A self.class.prune(backup_root)
1218N/A end
1218N/A
1218N/A def restore(backup_name)
1218N/A enable_maintenance_mode
1218N/A initialize_restore(backup_name)
1218N/A restore_sql_dump
1218N/A restore_repository_archive
1218N/A disable_maintenance_mode
1218N/A puts "Restored backup from #{backup_instance_dir}"
1218N/A end
1218N/A
1218N/A def self.prune(backup_root)
1218N/A if !Dir.exists?(backup_root)
1218N/A $stderr.puts "Nothing to prune: There is no backup directory."
1218N/A return
1218N/A end
1448N/A now = Time.now
1448N/A backup_dirs_allowed_to_delete(Dir.new(backup_root).entries).each do |dir|
1448N/A backup = backup_root.join(dir)
1448N/A if now - File.new(backup).ctime > BACKUPS_VALIDITY_TIME
1448N/A puts "removing old backup: #{dir}"
1448N/A FileUtils.rm_r(backup)
1448N/A end
1448N/A end
1448N/A end
1448N/A
1448N/A protected
1448N/A
1448N/A def new_backup_name
1448N/A Time.now.strftime("%Y-%m-%d_%H-%M-%S")
1448N/A end
1448N/A
1448N/A def initialize_backup
1448N/A @backup_instance_dir = backup_root.join(new_backup_name)
1431N/A puts "FileUtils.mkdir_p #{backup_instance_dir}" if verbose
1431N/A # Create directory even in dry run to let the script continue.
1431N/A FileUtils.mkdir_p(backup_instance_dir)
1431N/A puts "FileUtils.chown #{@user} #{@group} #{backup_instance_dir}" if verbose
1431N/A FileUtils.chown(@user, @group, backup_instance_dir)
1431N/A end
1431N/A
1431N/A def create_sql_dump
1431N/A puts 'Creating SQL dump...'
1431N/A Dir.chdir(backup_instance_dir) do
1431N/A exec('pg_dump', *pg_user_switch, '-Fc', db_name,
1431N/A '-f', backup_instance_dir.join(SQL_DUMP_FILE), user: @user)
1431N/A end
1431N/A end
1431N/A
1431N/A def create_repository_archive
1431N/A puts 'Creating repository archive...'
1431N/A Dir.chdir(data_root.join('..')) do
1431N/A archive_file = backup_instance_dir.join(REPOSITORY_FILE)
1431N/A exec('tar', verbose ? '-v' : '', '-cf', archive_file.to_s, *@data_dirs,
1431N/A user: @user)
1431N/A end
1431N/A end
1431N/A
1431N/A def initialize_restore(backup_name)
1431N/A @backup_instance_dir = backup_root.join(backup_name)
1431N/A unless Dir.exists?(backup_instance_dir)
1431N/A $stderr.puts (
1431N/A "Error: Backup '#{backup_name}' does not exist in #{backup_root}.")
1431N/A exit
1431N/A end
1431N/A end
1431N/A
1431N/A def restore_sql_dump
1431N/A 'Restoring SQL dump...'
1431N/A Dir.chdir(backup_instance_dir) do
1431N/A exec('pg_restore', '-n', 'public',
1431N/A '-c', *pg_user_switch,
1431N/A '-d', db_name,
1431N/A SQL_DUMP_FILE,
1431N/A user: @user)
1431N/A end
1431N/A end
1431N/A
1431N/A def restore_repository_archive
1431N/A puts 'Restoring repository archive...'
1431N/A Dir.chdir(data_root.join('..')) do
1431N/A tmpdir = Dir.mktmpdir
1431N/A move_data_dirs_to_tmpdir(tmpdir)
1431N/A begin
1431N/A extract_archive
1431N/A remove_tmpdir(tmpdir)
1431N/A rescue => e
2466N/A puts <<-MSG
1431N/A
1431N/AAn error occured while restoring the repositories:
1431N/A#{e.message}
1431N/AYou can find the pre-restore repositories at #{tmpdir}
1431N/ADo something about it.
1431N/A MSG
1431N/A raise e
1431N/A end
1431N/A end
1431N/A end
1431N/A
1431N/A def move_data_dirs_to_tmpdir(tmpdir)
1431N/A puts "FileUtils.mv(#{@data_dirs}, #{tmpdir})" if verbose
1431N/A FileUtils.mv(@data_dirs, tmpdir) unless dry_run
1431N/A rescue Errno::EACCES
1431N/A puts <<-MSG
2466N/A
1431N/AAs the current user I have no access to move the repository data
1431N/Adirectories #{@data_dirs.join(' ')} to a temporary directory #{tmpdir}.
1431N/AThis is used as a backup for the case of an error while restoring.
1431N/ATo continue, I try the command again using sudo.
1431N/A MSG
1431N/A exec('mv', *@data_dirs, tmpdir, user: 'root') unless dry_run
1431N/A end
1431N/A
1431N/A def extract_archive
1431N/A archive_file = backup_instance_dir.join(REPOSITORY_FILE)
1431N/A puts <<-MSG
1431N/A
1431N/ASuper user privileges are needed to reset the file permissions as
1431N/Athey were before the backup. If you refuse to enter the password
1431N/A(Ctl-C) or enter a wrong password, only the permissions will not be
576N/Arestored and all restored files will belong to the current user/group.
1448N/A MSG
576N/A exec('tar', verbose ? 'vxf' : 'xf', archive_file.to_s, *@data_dirs,
user: 'root')
end
def remove_tmpdir(tmpdir)
puts "FileUtils.remove_entry(#{tmpdir})" if verbose
FileUtils.remove_entry(tmpdir) # even do this in dry run
rescue Errno::EACCES
puts <<-MSG
As the current user I have no access to remove the temporary
directory #{tmpdir}.
To continue, I try the command again using sudo.
MSG
exec('rm', '-r', tmpdir, user: 'root')
end
def enable_maintenance_mode
puts 'Enabling maintenance mode...'
if File.exist?(maintenance_file)
$stderr.puts 'Maintenance mode was already enabled.'
$stderr.puts "Please check the file #{maintenance_file}"
$stderr.puts 'Aborting.'
exit
end
puts "FileUtils.touch #{maintenance_file}" if verbose
FileUtils.touch maintenance_file unless dry_run
end
def disable_maintenance_mode
puts 'Disabling maintenance mode...'
puts "FileUtils.rm #{maintenance_file}" if verbose
FileUtils.rm maintenance_file unless dry_run
end
# Execute a command as the given user.
def exec(*args, user: nil)
print "[executing next command in #{Dir.getwd}" if verbose
print " as user #{user}" if verbose && user
puts "]" if verbose
out = args.join(' ')
puts out if verbose
if !dry_run
if user == 'root'
exec_system(*[sudo, *args])
elsif user
# This looks strange because of the combination of + and sudo.
# It is needed on our deployment machines to get the environment right.
# On other machines, remove the call of +.
exec_system(*['+', 'sudo', '-u', user, 'bash', '-c',
"cd #{Dir.getwd} && #{escape_arguments(args)}"])
else
exec_system(*args)
end
end
end
def sudo
SUDO_BINARY
end
def exec_system(*args)
# puts args.join(' ') # For debugging
system(*args)
end
def escape_arguments(args)
rest = args[1..-1].map do |arg|
if arg.to_s.include?(' ')
%("#{arg.gsub('"', '\"')}")
else
arg
end
end
([args[0]] + rest).join(' ')
end
def maintenance_file
data_root.join(MAINTENANCE_FILE)
end
def pg_user_switch
sql_dump_as_db_user ? %W(-U #{sql_dump_as_db_user}) : []
end
def self.backup_dirs_allowed_to_delete(entries)
entries.reject{ |entry| %w(. ..).include?(entry) }[0..-(BACKUPS_COUNT+1)]
end
end
end
def data_root(rails_root)
ENV['DATA_ROOT'] ||'/data/git'
end
# Don't allow this to be run as the root user.
if ENV['USER'] != 'root'
puts 'Running this script as a normal user is disabled.'
puts 'Please run it as root.'
exit
end
# We assume, this script runs in "RAILS_ROOT/script/".
RAILS_ROOT = Pathname.new(__FILE__).dirname.join('..')
BACKUP_ROOT_PRODUCTION = '/data/ontohub_data_backup'
USER = 'ontohub'
GROUP = 'webservd'
DATABASE = 'ontohub'
unless File.exist?(BACKUP_ROOT_PRODUCTION)
FileUtils.mkdir_p(BACKUP_ROOT_PRODUCTION)
end
BACKUP_ROOT = File.realpath(BACKUP_ROOT_PRODUCTION)
backup = Backup::Backup.new(DATABASE, data_root(RAILS_ROOT), BACKUP_ROOT,
sql_dump_as_db_user: 'ontohub',
user: USER, group: GROUP,
dry_run: false, verbose: true)
case ARGV.first
when 'create'
backup.create
when 'restore'
if ARGV.length == 1
$stderr.puts(
'To restore a backup, you need to specify one with the arguments')
$stderr.puts('"restore backup_name"')
exit
end
backup_name = ARGV[1]
backup.restore(backup_name)
when 'prune'
Backup::Backup.prune(BACKUP_ROOT)
else
$stderr.puts 'unknown or missing parameter'
$stderr.puts 'use parameter "create" or "restore <backup_name>" or "prune"'
exit
end