backup revision 6cdc461aada609d57d50ff675d29b15378717ff2
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# This backup script creates and restores backups of ontohub data. It includes:
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# * bare git repositories (data/repositories)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# * named symlinks to git repositories (data/git_daemon)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# * the postgres database
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# To create a backup, run this script with the argument `create`:
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Then a backup named with the current date and time is created in the
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# backup directory (see below).
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# To restore a backup, run this script with the argument `restore <backup name>`
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# $ script/backup restore 2015-01-01_00-00
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Then the selected backup is fully restored
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Backup directory
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# For development machines, the backup directory is:
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# <rails root>/tmp/backup/
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# And for production machines, the backup directory is:
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Super user privileges
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# To create and restore, we need root privileges. Otherwise file modes are not
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# preserved. This script will call `sudo` when needed and inform you about the
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# reason for calling `sudo`. If you don't allow sudo, a backup will be created
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# or restored anyway, but the file modes and ownership are not preserved.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Then, you need to adjust them manually.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# Maintenance mode
5c39d8f041417518a02ce2c941d96c2d33b2a364Mark de Reeper# While backing up and restoring the data, the maintenance mode is activated.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper# This way we guarantee data consistency of the backup.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperrequire 'fileutils'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperrequire 'pathname'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperrequire 'open3'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper # Amount of backups that have to be there at least
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper # Backups are kept for at least 365 days
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper SQL_DUMP_FILE = 'ontohub_sql_dump.postgresql'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper REPOSITORY_FILE = 'ontohub_repositories.tar.gz'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper DATA_DIRS = %w(data/repositories data/git_daemon)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper attr_reader :db_name, :data_root, :backup_root, :backup_instance_dir
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper attr_reader :dry_run, :verbose, :sql_dump_as_postgres_user
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper def initialize(db_name, data_root, backup_root,
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper verbose: false, dry_run: true, sql_dump_as_postgres_user: false)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper @sql_dump_as_postgres_user = sql_dump_as_postgres_user
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper # We needed to create the directory for the script to continue later on.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper puts "Created backup in #{backup_instance_dir}"
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper puts "Restored backup from #{backup_instance_dir}"
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper $stderr.puts "Nothing to prune: There is no backup directory."
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper backup_dirs_allowed_to_delete(Dir.new(backup_root).entries).each do |dir|
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper if now - File.new(backup).ctime > BACKUPS_VALIDITY_TIME
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper @backup_instance_dir = backup_root.join(new_backup_name)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper puts "FileUtils.mkdir_p #{backup_instance_dir}" if verbose
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper # Create directory even in dry run to let the script continue.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper exec('pg_dump', *pg_user_switch, '-Fc', db_name,
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper archive_file = backup_instance_dir.join(REPOSITORY_FILE)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper exec('tar', verbose ? '-v' : '', '-czf', archive_file.to_s,
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper @backup_instance_dir = backup_root.join(backup_name)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper "Error: Backup '#{backup_name}' does not exist in #{backup_root}.")
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper 'Restoring SQL dump...'
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper exec('pg_restore', '-c', *pg_user_switch, '-d', db_name, SQL_DUMP_FILE)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperAn error occured while restoring the repositories:
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperYou can find the pre-restore repositories at #{tmpdir}
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperDo something about it.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper def move_data_dirs_to_tmpdir(tmpdir)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper puts "FileUtils.mv(#{DATA_DIRS}, #{tmpdir})" if verbose
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper FileUtils.mv(DATA_DIRS, tmpdir) unless dry_run
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper rescue Errno::EACCES
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperAs the current user I have no access to move the repository data
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperdirectories #{DATA_DIRS.join(' ')} to a temporary directory #{tmpdir}.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperThis is used as a backup for the case of an error while restoring.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperTo continue, I try the command again using sudo.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper exec('sudo', 'mv', *DATA_DIRS.map(&:to_s), tmpdir)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper def extract_archive
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper archive_file = backup_instance_dir.join(REPOSITORY_FILE)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperSuper user privileges are needed to reset the file permissions as
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperthey were before the backup. If you refuse to enter the password
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper(Ctl-C) or enter a wrong password, only the permissions will not be
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperrestored and all restored files will belong to the current user/group.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper try_as_sudo_with_fallback('tar', verbose ? '-v' : '', '-xzf',
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper archive_file.to_s, *DATA_DIRS.map(&:to_s))
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper def remove_tmpdir(tmpdir)
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper puts "FileUtils.remove_entry(#{tmpdir})" if verbose
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper FileUtils.remove_entry(tmpdir) # even do this in dry run
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper rescue Errno::EACCES
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperAs the current user I have no access to remove the temporary
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeperdirectory #{tmpdir}.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de ReeperTo continue, I try the command again using sudo.
6406210b71fd4a97800f32f3613eea9b6a6a12ceMark de Reeper exec('sudo', 'rm', '-r', tmpdir)