backup revision a9b1a876554e3539bd8e70e6081888c8a0134f22
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# The first line is for deployment machines only. For local machines, use:
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# You can find more extensive documentation of this script at
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# https://github.com/ontohub/ontohub/blob/staging/doc/backup_and_restore_of_ontohub_data.md
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# This backup script creates and restores backups of ontohub data. It includes:
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# * bare git repositories (data/repositories)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# * named symlinks to git repositories (data/git_daemon)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# * the postgres database
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# First note: Run this as the root user, e.g. with sudo.
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# To create a backup, run this script with the argument `create`:
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# Then a backup named with the current date and time is created in the
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# backup directory (see below).
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# To restore a backup, run this script with the argument `restore <backup name>`
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# # script/backup restore 2015-01-01_00-00
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# Then the selected backup is fully restored
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# Backup directory
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# For development machines, the backup directory is:
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# <rails root>/tmp/backup/
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# And for production machines, the backup directory is:
f16c114c20bbd7d292d93415d1e56c8dd6abd3e7Timo Sirainen# Super user privileges
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# To create and restore, we need root privileges. Otherwise file modes are not
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# preserved. This script will call `sudo` when needed and inform you about the
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# reason for calling `sudo`. If you don't allow sudo, a backup will be created
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen# or restored anyway, but the file modes and ownership are not preserved.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# Then, you need to adjust them manually.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# Maintenance mode
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# While backing up and restoring the data, the maintenance mode is activated.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# This way we guarantee data consistency of the backup.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenrequire 'fileutils'
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainenrequire 'pathname'
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainenrequire 'open3'
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # Amount of backups that have to be there at least
7a6b45405fb1544ac476e6eb1402a70cc1ddcdcfTimo Sirainen # Backups are kept for at least 365 days
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen REPOSITORY_FILE = 'ontohub_repositories.tar.gz'
98c1cf256927e254f0c092acd2ddcd7ea50bd009Timo Sirainen # Use 'sudo' on most systems
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen attr_reader :db_name, :data_root, :backup_root, :backup_instance_dir
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen attr_reader :dry_run, :verbose, :sql_dump_as_db_user
8153fdec343e40e2a78f5c12353e89b994b28f74Timo Sirainen def initialize(db_name, data_root, backup_root,
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen verbose: false, dry_run: true, sql_dump_as_db_user: nil,
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @data_root_basename = @data_root.basename.to_s
02b79f9c2636da1829eee5b92753602bba8b67edTimo Sirainen @data_dirs = DATA_DIRS.map { |dir| File.join(@data_root_basename, dir) }
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen # We needed to create the directory for the script to continue later on.
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen puts "Created backup in #{backup_instance_dir}"
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen puts "Restored backup from #{backup_instance_dir}"
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen $stderr.puts "Nothing to prune: There is no backup directory."
4d25408732be27e91f0430f71e87242760c2517cTimo Sirainen backup_dirs_allowed_to_delete(Dir.new(backup_root).entries).each do |dir|
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainen if now - File.new(backup).ctime > BACKUPS_VALIDITY_TIME
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen @backup_instance_dir = backup_root.join(new_backup_name)
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen puts "FileUtils.mkdir_p #{backup_instance_dir}" if verbose
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen # Create directory even in dry run to let the script continue.
9aa52288a4b53186d81b0ec9afa7d9e0a8ee8753Timo Sirainen puts "FileUtils.chown #{@user} #{@group} #{backup_instance_dir}" if verbose
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen FileUtils.chown(@user, @group, backup_instance_dir)
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen exec('pg_dump', *pg_user_switch, '-Fc', db_name,
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen '-f', backup_instance_dir.join(SQL_DUMP_FILE), user: @user)
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen archive_file = backup_instance_dir.join(REPOSITORY_FILE)
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen exec('tar', verbose ? '-v' : '', '-cf', archive_file.to_s, *@data_dirs,
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen @backup_instance_dir = backup_root.join(backup_name)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen "Error: Backup '#{backup_name}' does not exist in #{backup_root}.")
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenAn error occured while restoring the repositories:
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenYou can find the pre-restore repositories at #{tmpdir}
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenDo something about it.
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen def move_data_dirs_to_tmpdir(tmpdir)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen puts "FileUtils.mv(#{@data_dirs}, #{tmpdir})" if verbose
b3febb0933fdce10394d25093e23ce0a5aadddd3Timo Sirainen FileUtils.mv(@data_dirs, tmpdir) unless dry_run
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen rescue Errno::EACCES
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo SirainenAs the current user I have no access to move the repository data
313fe89df4d91cd0cd7f3558dc6d7fd21ad39eeeTimo Sirainendirectories #{@data_dirs.join(' ')} to a temporary directory #{tmpdir}.
bb10ebcf076c959c752f583746d83805d7686df8Timo SirainenThis is used as a backup for the case of an error while restoring.
bb10ebcf076c959c752f583746d83805d7686df8Timo SirainenTo continue, I try the command again using sudo.
9aa52288a4b53186d81b0ec9afa7d9e0a8ee8753Timo Sirainen exec('mv', *@data_dirs, tmpdir, user: 'root')
39775ad03c459efe64cce924658da5094ba417e1Timo Sirainen def extract_archive
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen archive_file = backup_instance_dir.join(REPOSITORY_FILE)
39775ad03c459efe64cce924658da5094ba417e1Timo SirainenSuper user privileges are needed to reset the file permissions as
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainenthey were before the backup. If you refuse to enter the password
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen(Ctl-C) or enter a wrong password, only the permissions will not be
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainenrestored and all restored files will belong to the current user/group.
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen exec('tar', verbose ? 'vxf' : 'xf', archive_file.to_s, *@data_dirs,
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen user: 'root')
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen def remove_tmpdir(tmpdir)
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen puts "FileUtils.remove_entry(#{tmpdir})" if verbose
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen FileUtils.remove_entry(tmpdir) # even do this in dry run
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen rescue Errno::EACCES
bb10ebcf076c959c752f583746d83805d7686df8Timo SirainenAs the current user I have no access to remove the temporary
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainendirectory #{tmpdir}.
bb10ebcf076c959c752f583746d83805d7686df8Timo SirainenTo continue, I try the command again using sudo.
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen exec('rm', '-r', tmpdir, user: 'root')
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen def enable_maintenance_mode
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen puts 'Enabling maintenance mode...'
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen if File.exist?(maintenance_file)
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts 'Maintenance mode was already enabled.'
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts "Please check the file #{maintenance_file}"
db7c9201c88e3d9bee10485194ee5b0c67249916Timo Sirainen $stderr.puts 'Aborting.'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen puts "FileUtils.touch #{maintenance_file}" if verbose
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen FileUtils.touch maintenance_file unless dry_run
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen def disable_maintenance_mode
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen puts 'Disabling maintenance mode...'
6843896c40bee4f9b6680ca7ced598c446e9f999Timo Sirainen puts "FileUtils.rm #{maintenance_file}" if verbose
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen FileUtils.rm maintenance_file unless dry_run
a393d9d6dabdc46cf724f8cb004a652b4036d53dTimo Sirainen # Execute a command as the given user.
6843896c40bee4f9b6680ca7ced598c446e9f999Timo Sirainen def exec(*args, user: nil)
a393d9d6dabdc46cf724f8cb004a652b4036d53dTimo Sirainen print "[executing next command in #{Dir.getwd}" if verbose
6843896c40bee4f9b6680ca7ced598c446e9f999Timo Sirainen print " as user #{user}" if verbose && user
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen puts "]" if verbose
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen out = args.join(' ')
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen puts out if verbose
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen if user == 'root'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen exec_system(*[sudo, *args])
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen # This looks strange because of the double sudo.
a393d9d6dabdc46cf724f8cb004a652b4036d53dTimo Sirainen # It is needed on our deployment machines to get the environment right.
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen exec_system(*['+', 'sudo', '-u', user, 'bash', '-c',
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen "cd #{Dir.getwd} && #{escape_arguments(args)}"])
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen exec_system(*args)
8153fdec343e40e2a78f5c12353e89b994b28f74Timo Sirainen def exec_system(*args)
7f773564b94e6054a40d3785cb63c29f1e4d4deeTimo Sirainen # puts args.join(' ') # For debugging
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen system(*args)
9a06cabdfdf4d5e2f19a07e506c3c7d08a7e7038Timo Sirainen def escape_arguments(args)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen rest = args[1..-1].map do |arg|
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen if arg.to_s.include?(' ')
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen %("#{arg.gsub('"', '\"')}")
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen ([args[0]] + rest).join(' ')
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen def maintenance_file
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen data_root.join(MAINTENANCE_FILE)
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen def pg_user_switch
df4018ae2f0a95be602f724ca70df7e0e3bd6a7dTimo Sirainen sql_dump_as_db_user ? %W(-U #{sql_dump_as_db_user}) : []
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen def self.backup_dirs_allowed_to_delete(entries)
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen entries.reject{ |entry| %w(. ..).include?(entry) }[0..-(BACKUPS_COUNT+1)]
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainendef data_root(rails_root)
7797aa2479e99aeb71057b7a2584b2cb72e4d3f8Timo Sirainen if on_development_system?(rails_root)
7797aa2479e99aeb71057b7a2584b2cb72e4d3f8Timo Sirainen File.realpath(rails_root.join('data'))
7797aa2479e99aeb71057b7a2584b2cb72e4d3f8Timo Sirainen ENV['DATA_ROOT'] ||'/data/git'
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainendef on_development_system?(rails_root)
f0f9c8e94abac18f8acd91b9e724c4c32863723aTimo Sirainen data_path = rails_root.join('data')
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen File.exist?(data_path) && !File.symlink?(data_path)
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen# Don't allow this to be run as the root user.
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainenif ENV['USER'] != 'root'
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen puts 'Running this script as a normal user is disabled.'
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen puts 'Please run it as root.'
7f3be7d885c75cdd77f536929a45bc9764595960Timo Sirainen# We assume, this script runs in "RAILS_ROOT/script/".
7f3be7d885c75cdd77f536929a45bc9764595960Timo SirainenRAILS_ROOT = Pathname.new(__FILE__).dirname.join('..')
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenBACKUP_ROOT_PRODUCTION = '/local/home/ontohub/ontohub_data_backup'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenUSER = 'ontohub'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo SirainenGROUP = 'webservd'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen if on_development_system?(RAILS_ROOT)
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen 'ontohub_development'
7797aa2479e99aeb71057b7a2584b2cb72e4d3f8Timo Sirainen if on_development_system?(RAILS_ROOT)
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen RAILS_ROOT.join('tmp', 'backup')
4d25408732be27e91f0430f71e87242760c2517cTimo Sirainen File.realpath(BACKUP_ROOT_PRODUCTION)
4d25408732be27e91f0430f71e87242760c2517cTimo Sirainenbackup = Backup::Backup.new(DATABASE, data_root(RAILS_ROOT), BACKUP_ROOT,
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen sql_dump_as_db_user: on_development_system?(RAILS_ROOT) ? 'postgres' : 'ontohub',
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen user: USER, group: GROUP,
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen dry_run: false, verbose: true)
7797aa2479e99aeb71057b7a2584b2cb72e4d3f8Timo Sirainencase ARGV.first
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen backup.create
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainenwhen 'restore'
dda2c506c8fc8ac2f88272de4523ded42baa0aa0Timo Sirainen if ARGV.length == 1
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts(
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen 'To restore a backup, you need to specify one with the arguments')
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts('"restore backup_name"')
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen backup_name = ARGV[1]
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen backup.restore(backup_name)
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen Backup::Backup.prune(BACKUP_ROOT)
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts 'unknown or missing parameter'
bb10ebcf076c959c752f583746d83805d7686df8Timo Sirainen $stderr.puts 'use parameter "create" or "restore <backup_name>" or "prune"'