Build a comprehensive backup verification framework that automatically tests database restores, validates filesystem backups, and performs recovery testing using systemd timers and custom scripts.
Prerequisites
- Root access to Linux server
- PostgreSQL and/or MySQL installed
- Existing backup infrastructure
- Basic understanding of systemd
What this solves
Backups without verification are useless. This tutorial sets up automated testing that validates your PostgreSQL and MySQL backups actually restore correctly, checks filesystem backup integrity, and performs regular recovery testing. You'll build a framework that catches corrupted backups before you need them in an emergency.
Step-by-step installation
Install required packages
Install backup tools, database clients, and utilities for integrity checking and compression.
sudo apt update
sudo apt install -y postgresql-client mysql-client rsync gzip pigz pv jq bc mail-utils
Create backup verification directory structure
Set up organized directories for scripts, logs, temporary files, and test data.
sudo mkdir -p /opt/backup-verify/{scripts,logs,temp,config}
sudo mkdir -p /var/log/backup-verify
sudo useradd -r -s /bin/bash -d /opt/backup-verify backup-verify
sudo chown -R backup-verify:backup-verify /opt/backup-verify
sudo chown backup-verify:backup-verify /var/log/backup-verify
Create PostgreSQL backup verification script
This script downloads PostgreSQL backups, attempts restoration to a test database, and validates data integrity.
#!/bin/bash
PostgreSQL Backup Verification Script
set -euo pipefail
Configuration
BACKUP_DIR="/backups/postgresql"
TEST_DB="backup_verify_test"
LOG_FILE="/var/log/backup-verify/postgresql-$(date +%Y%m%d_%H%M%S).log"
MAX_AGE_HOURS=48
Database connection settings
PG_HOST="${PG_HOST:-localhost}"
PG_PORT="${PG_PORT:-5432}"
PG_USER="${PG_USER:-postgres}"
PG_PASSWORD="${PG_PASSWORD:-}"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
cleanup() {
log "Cleaning up test database"
PGPASSWORD="$PG_PASSWORD" dropdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" --if-exists "$TEST_DB" 2>/dev/null || true
rm -rf /tmp/pg_verify_*
}
trap cleanup EXIT
verify_backup() {
local backup_file="$1"
local backup_name=$(basename "$backup_file")
log "Verifying backup: $backup_name"
# Check file age
local file_age=$(($(date +%s) - $(stat -c %Y "$backup_file")))
local max_age=$((MAX_AGE_HOURS * 3600))
if [ $file_age -gt $max_age ]; then
log "ERROR: Backup file is older than $MAX_AGE_HOURS hours"
return 1
fi
# Check file integrity
if [[ "$backup_file" == *.gz ]]; then
if ! gzip -t "$backup_file"; then
log "ERROR: Backup file is corrupted (gzip test failed)"
return 1
fi
fi
# Create test database
log "Creating test database: $TEST_DB"
PGPASSWORD="$PG_PASSWORD" createdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" "$TEST_DB"
# Restore backup
log "Restoring backup to test database"
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -q
else
PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -f "$backup_file" -q
fi
# Validate restored data
log "Validating restored data"
local table_count=$(PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';" | xargs)
if [ "$table_count" -eq 0 ]; then
log "WARNING: No tables found in restored database"
return 1
fi
log "SUCCESS: Backup verified - $table_count tables restored"
return 0
}
main() {
log "Starting PostgreSQL backup verification"
if [ ! -d "$BACKUP_DIR" ]; then
log "ERROR: Backup directory not found: $BACKUP_DIR"
exit 1
fi
local success_count=0
local total_count=0
# Find recent backup files
while IFS= read -r -d '' backup_file; do
((total_count++))
if verify_backup "$backup_file"; then
((success_count++))
fi
cleanup
done < <(find "$BACKUP_DIR" -name ".sql" -o -name ".sql.gz" -type f -mtime -2 -print0)
log "Verification complete: $success_count/$total_count backups verified successfully"
if [ "$success_count" -eq 0 ] && [ "$total_count" -gt 0 ]; then
log "CRITICAL: All backup verifications failed"
exit 1
elif [ "$total_count" -eq 0 ]; then
log "WARNING: No recent backups found"
exit 1
fi
}
main "$@"
Create MySQL backup verification script
Similar to PostgreSQL verification but handles MySQL-specific restore procedures and validation.
#!/bin/bash
MySQL Backup Verification Script
set -euo pipefail
Configuration
BACKUP_DIR="/backups/mysql"
TEST_DB="backup_verify_test"
LOG_FILE="/var/log/backup-verify/mysql-$(date +%Y%m%d_%H%M%S).log"
MAX_AGE_HOURS=48
Database connection settings
MY_HOST="${MY_HOST:-localhost}"
MY_PORT="${MY_PORT:-3306}"
MY_USER="${MY_USER:-root}"
MY_PASSWORD="${MY_PASSWORD:-}"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
cleanup() {
log "Cleaning up test database"
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -e "DROP DATABASE IF EXISTS \$TEST_DB\;" 2>/dev/null || true
rm -rf /tmp/my_verify_*
}
trap cleanup EXIT
verify_backup() {
local backup_file="$1"
local backup_name=$(basename "$backup_file")
log "Verifying backup: $backup_name"
# Check file age
local file_age=$(($(date +%s) - $(stat -c %Y "$backup_file")))
local max_age=$((MAX_AGE_HOURS * 3600))
if [ $file_age -gt $max_age ]; then
log "ERROR: Backup file is older than $MAX_AGE_HOURS hours"
return 1
fi
# Check file integrity
if [[ "$backup_file" == *.gz ]]; then
if ! gzip -t "$backup_file"; then
log "ERROR: Backup file is corrupted (gzip test failed)"
return 1
fi
fi
# Create test database
log "Creating test database: $TEST_DB"
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -e "CREATE DATABASE IF NOT EXISTS \$TEST_DB\;"
# Restore backup
log "Restoring backup to test database"
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" "$TEST_DB"
else
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" "$TEST_DB" < "$backup_file"
fi
# Validate restored data
log "Validating restored data"
local table_count=$(mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -s -N -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '$TEST_DB';")
if [ "$table_count" -eq 0 ]; then
log "WARNING: No tables found in restored database"
return 1
fi
# Check for common corruption indicators
local corrupt_tables=$(mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" "$TEST_DB" -s -N -e "CHECK TABLE $(mysql -h '$MY_HOST' -P '$MY_PORT' -u '$MY_USER' -p'$MY_PASSWORD' '$TEST_DB' -s -N -e 'SHOW TABLES' | tr '\n' ',' | sed 's/,$//')" 2>/dev/null | grep -c 'error' || echo 0)
if [ "$corrupt_tables" -gt 0 ]; then
log "ERROR: Found $corrupt_tables corrupted tables in backup"
return 1
fi
log "SUCCESS: Backup verified - $table_count tables restored"
return 0
}
main() {
log "Starting MySQL backup verification"
if [ ! -d "$BACKUP_DIR" ]; then
log "ERROR: Backup directory not found: $BACKUP_DIR"
exit 1
fi
local success_count=0
local total_count=0
# Find recent backup files
while IFS= read -r -d '' backup_file; do
((total_count++))
if verify_backup "$backup_file"; then
((success_count++))
fi
cleanup
done < <(find "$BACKUP_DIR" -name ".sql" -o -name ".sql.gz" -type f -mtime -2 -print0)
log "Verification complete: $success_count/$total_count backups verified successfully"
if [ "$success_count" -eq 0 ] && [ "$total_count" -gt 0 ]; then
log "CRITICAL: All backup verifications failed"
exit 1
elif [ "$total_count" -eq 0 ]; then
log "WARNING: No recent backups found"
exit 1
fi
}
main "$@"
Create filesystem backup verification script
Validates filesystem backups by checking archive integrity, file counts, and performing sample restorations.
#!/bin/bash
Filesystem Backup Verification Script
set -euo pipefail
Configuration
BACKUP_DIR="/backups/filesystem"
TEST_RESTORE_DIR="/tmp/backup_verify_restore"
LOG_FILE="/var/log/backup-verify/filesystem-$(date +%Y%m%d_%H%M%S).log"
MAX_AGE_HOURS=48
SAMPLE_RESTORE_COUNT=5
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
cleanup() {
log "Cleaning up test restore directory"
rm -rf "$TEST_RESTORE_DIR"
}
trap cleanup EXIT
verify_tar_backup() {
local backup_file="$1"
local backup_name=$(basename "$backup_file")
log "Verifying tar backup: $backup_name"
# Test archive integrity
if [[ "$backup_file" == .gz || "$backup_file" == .tgz ]]; then
if ! tar -tzf "$backup_file" >/dev/null 2>&1; then
log "ERROR: Archive integrity test failed"
return 1
fi
elif [[ "$backup_file" == *.bz2 ]]; then
if ! tar -tjf "$backup_file" >/dev/null 2>&1; then
log "ERROR: Archive integrity test failed"
return 1
fi
else
if ! tar -tf "$backup_file" >/dev/null 2>&1; then
log "ERROR: Archive integrity test failed"
return 1
fi
fi
# Count files in archive
local file_count
if [[ "$backup_file" == .gz || "$backup_file" == .tgz ]]; then
file_count=$(tar -tzf "$backup_file" | wc -l)
elif [[ "$backup_file" == *.bz2 ]]; then
file_count=$(tar -tjf "$backup_file" | wc -l)
else
file_count=$(tar -tf "$backup_file" | wc -l)
fi
if [ "$file_count" -eq 0 ]; then
log "ERROR: Archive appears to be empty"
return 1
fi
# Perform sample restoration
mkdir -p "$TEST_RESTORE_DIR"
log "Performing sample restoration of $SAMPLE_RESTORE_COUNT files"
local sample_files
if [[ "$backup_file" == .gz || "$backup_file" == .tgz ]]; then
sample_files=$(tar -tzf "$backup_file" | head -n "$SAMPLE_RESTORE_COUNT")
echo "$sample_files" | while read -r file; do
if [ -n "$file" ] && [[ "$file" != */ ]]; then
tar -xzf "$backup_file" -C "$TEST_RESTORE_DIR" "$file" 2>/dev/null || true
fi
done
elif [[ "$backup_file" == *.bz2 ]]; then
sample_files=$(tar -tjf "$backup_file" | head -n "$SAMPLE_RESTORE_COUNT")
echo "$sample_files" | while read -r file; do
if [ -n "$file" ] && [[ "$file" != */ ]]; then
tar -xjf "$backup_file" -C "$TEST_RESTORE_DIR" "$file" 2>/dev/null || true
fi
done
else
sample_files=$(tar -tf "$backup_file" | head -n "$SAMPLE_RESTORE_COUNT")
echo "$sample_files" | while read -r file; do
if [ -n "$file" ] && [[ "$file" != */ ]]; then
tar -xf "$backup_file" -C "$TEST_RESTORE_DIR" "$file" 2>/dev/null || true
fi
done
fi
local restored_count=$(find "$TEST_RESTORE_DIR" -type f | wc -l)
log "Sample restoration completed: $restored_count files restored"
log "SUCCESS: Archive verified - $file_count files in archive"
return 0
}
verify_rsync_backup() {
local backup_dir="$1"
local backup_name=$(basename "$backup_dir")
log "Verifying rsync backup: $backup_name"
# Check if directory exists and has content
if [ ! -d "$backup_dir" ]; then
log "ERROR: Backup directory does not exist"
return 1
fi
local file_count=$(find "$backup_dir" -type f | wc -l)
if [ "$file_count" -eq 0 ]; then
log "ERROR: Backup directory is empty"
return 1
fi
# Check for rsync log or manifest
local manifest_file="$backup_dir/.backup_manifest"
if [ -f "$manifest_file" ]; then
log "Found backup manifest, validating file checksums"
if ! (cd "$backup_dir" && sha256sum -c ".backup_manifest" 2>/dev/null); then
log "WARNING: Some files failed checksum verification"
else
log "All files passed checksum verification"
fi
fi
# Sample file accessibility test
local accessible_count=0
local sample_files=$(find "$backup_dir" -type f -name "*" | head -n "$SAMPLE_RESTORE_COUNT")
echo "$sample_files" | while read -r file; do
if [ -n "$file" ] && [ -r "$file" ]; then
((accessible_count++))
fi
done
log "SUCCESS: Directory backup verified - $file_count files found"
return 0
}
verify_backup() {
local backup_path="$1"
local backup_name=$(basename "$backup_path")
# Check file/directory age
local file_age=$(($(date +%s) - $(stat -c %Y "$backup_path")))
local max_age=$((MAX_AGE_HOURS * 3600))
if [ $file_age -gt $max_age ]; then
log "ERROR: Backup is older than $MAX_AGE_HOURS hours"
return 1
fi
if [ -f "$backup_path" ]; then
# File-based backup (tar, etc.)
if [[ "$backup_path" == .tar ]]; then
verify_tar_backup "$backup_path"
else
log "WARNING: Unknown file type, skipping: $backup_name"
return 1
fi
elif [ -d "$backup_path" ]; then
# Directory-based backup (rsync, etc.)
verify_rsync_backup "$backup_path"
else
log "ERROR: Backup path is neither file nor directory: $backup_path"
return 1
fi
}
main() {
log "Starting filesystem backup verification"
if [ ! -d "$BACKUP_DIR" ]; then
log "ERROR: Backup directory not found: $BACKUP_DIR"
exit 1
fi
local success_count=0
local total_count=0
# Find recent backup files and directories
while IFS= read -r -d '' backup_path; do
((total_count++))
if verify_backup "$backup_path"; then
((success_count++))
fi
cleanup
done < <(find "$BACKUP_DIR" -maxdepth 1 \( -name ".tar" -o -type d \) -mtime -2 -print0)
log "Verification complete: $success_count/$total_count backups verified successfully"
if [ "$success_count" -eq 0 ] && [ "$total_count" -gt 0 ]; then
log "CRITICAL: All backup verifications failed"
exit 1
elif [ "$total_count" -eq 0 ]; then
log "WARNING: No recent backups found"
exit 1
fi
}
main "$@"
Create master verification script
Orchestrates all backup verifications and provides centralized reporting and alerting.
#!/bin/bash
Master Backup Verification Script
set -euo pipefail
Configuration
SCRIPT_DIR="/opt/backup-verify/scripts"
LOG_DIR="/var/log/backup-verify"
CONFIG_FILE="/opt/backup-verify/config/verify.conf"
REPORT_FILE="$LOG_DIR/master-report-$(date +%Y%m%d_%H%M%S).json"
EMAIL_RECIPIENT="${EMAIL_RECIPIENT:-admin@example.com}"
SMTP_SERVER="${SMTP_SERVER:-localhost}"
Load configuration if exists
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
fi
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1"
}
send_alert() {
local subject="$1"
local message="$2"
if command -v mail >/dev/null 2>&1; then
echo "$message" | mail -s "$subject" "$EMAIL_RECIPIENT"
log "Alert sent: $subject"
else
log "WARNING: mail command not available, cannot send alert"
fi
}
run_verification() {
local script_name="$1"
local script_path="$SCRIPT_DIR/$script_name"
local start_time=$(date +%s)
if [ ! -f "$script_path" ]; then
log "ERROR: Verification script not found: $script_path"
return 1
fi
if [ ! -x "$script_path" ]; then
log "ERROR: Verification script not executable: $script_path"
return 1
fi
log "Running verification: $script_name"
local output
local exit_code
if output=$("$script_path" 2>&1); then
exit_code=0
log "SUCCESS: $script_name completed successfully"
else
exit_code=$?
log "FAILED: $script_name failed with exit code $exit_code"
fi
local end_time=$(date +%s)
local duration=$((end_time - start_time))
# Store results in JSON format
local result=$(cat < "$REPORT_FILE" <.log" -o -name ".json" -mtime +30 -delete 2>/dev/null || true
log "Master verification completed"
}
main "$@"
Set up database connection configuration
Create secure configuration files for database connections with proper permissions.
# Backup Verification Configuration
PostgreSQL Settings
export PG_HOST="localhost"
export PG_PORT="5432"
export PG_USER="backup_verify"
export PG_PASSWORD="your_secure_password_here"
MySQL Settings
export MY_HOST="localhost"
export MY_PORT="3306"
export MY_USER="backup_verify"
export MY_PASSWORD="your_secure_password_here"
Email Settings
export EMAIL_RECIPIENT="admin@example.com"
export SMTP_SERVER="localhost"
Backup Directory Overrides (optional)
export POSTGRESQL_BACKUP_DIR="/custom/postgresql/backups"
export MYSQL_BACKUP_DIR="/custom/mysql/backups"
export FILESYSTEM_BACKUP_DIR="/custom/filesystem/backups"
sudo chown backup-verify:backup-verify /opt/backup-verify/config/verify.conf
sudo chmod 600 /opt/backup-verify/config/verify.conf
Create database users for verification
Set up dedicated database users with minimal required privileges for backup testing.
# For PostgreSQL
sudo -u postgres createuser --no-createdb --no-createrole --no-superuser backup_verify
sudo -u postgres psql -c "ALTER USER backup_verify WITH PASSWORD 'your_secure_password_here';"
sudo -u postgres psql -c "GRANT CREATE ON DATABASE postgres TO backup_verify;"
For MySQL
mysql -u root -p <. TO 'backup_verify'@'localhost';
FLUSH PRIVILEGES;
EOF
Make scripts executable
Set proper permissions and ownership for all verification scripts.
sudo chmod +x /opt/backup-verify/scripts/*.sh
sudo chown -R backup-verify:backup-verify /opt/backup-verify/scripts/
Create systemd service for backup verification
Set up a systemd service that runs the verification framework with proper logging and error handling.
[Unit]
Description=Backup Verification Service
After=network.target postgresql.service mysql.service
Wants=postgresql.service mysql.service
[Service]
Type=oneshot
User=backup-verify
Group=backup-verify
WorkingDirectory=/opt/backup-verify
ExecStart=/opt/backup-verify/scripts/master-verify.sh
StandardOutput=journal
StandardError=journal
SyslogIdentifier=backup-verify
Security settings
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/opt/backup-verify /var/log/backup-verify /tmp
PrivateTmp=true
Resource limits
TimeoutStartSec=3600
TimeoutStopSec=60
[Install]
WantedBy=multi-user.target
sudo systemctl daemon-reload
Create systemd timer for automated verification
Schedule regular backup verification runs with configurable frequency and randomized timing.
[Unit]
Description=Backup Verification Timer
Requires=backup-verify.service
[Timer]
Run daily at 2:00 AM with randomized delay up to 2 hours
OnCalendar=--* 02:00:00
RandomizedDelaySec=7200
Also run on system boot (after 10 minutes)
OnBootSec=10min
Ensure service runs even if previous run was missed
Persistent=true
[Install]
WantedBy=timers.target
Enable and start the verification service
Activate the systemd timer to begin automated backup verification.
sudo systemctl enable backup-verify.timer
sudo systemctl start backup-verify.timer
sudo systemctl status backup-verify.timer
Configure email notifications
Install and configure mail transfer agent
Set up Postfix for sending verification alerts and reports.
sudo apt install -y postfix mailutils
sudo dpkg-reconfigure postfix
Configure Postfix for relay
Configure Postfix to relay mail through your organization's SMTP server.
# Add these lines to main.cf
relayhost = [smtp.example.com]:587
smtp_sasl_auth_enable = yes
smtp_sasl_password_maps = hash:/etc/postfix/sasl_passwd
smtp_sasl_security_options = noanonymous
smtp_tls_security_level = encrypt
smtp_tls_CAfile = /etc/ssl/certs/ca-certificates.crt
[smtp.example.com]:587 username:password
sudo chmod 600 /etc/postfix/sasl_passwd
sudo postmap /etc/postfix/sasl_passwd
sudo systemctl restart postfix
Verify your setup
# Test individual verification scripts
sudo -u backup-verify /opt/backup-verify/scripts/verify-postgresql.sh
sudo -u backup-verify /opt/backup-verify/scripts/verify-mysql.sh
sudo -u backup-verify /opt/backup-verify/scripts/verify-filesystem.sh
Test master verification
sudo -u backup-verify /opt/backup-verify/scripts/master-verify.sh
Check systemd timer status
sudo systemctl status backup-verify.timer
sudo systemctl list-timers backup-verify.timer
Manually trigger verification
sudo systemctl start backup-verify.service
sudo journalctl -u backup-verify.service -f
Check generated reports
ls -la /var/log/backup-verify/
cat /var/log/backup-verify/master-report-*.json | jq '.'
Integration with monitoring
Export metrics for Prometheus
Create a metrics exporter script that converts verification results to Prometheus format.
#!/bin/bash
Export backup verification metrics for Prometheus
set -euo pipefail
LOG_DIR="/var/log/backup-verify"
METRICS_FILE="/var/lib/prometheus/node-exporter/backup_verify.prom"
METRICS_DIR=$(dirname "$METRICS_FILE")
mkdir -p "$METRICS_DIR"
Find latest report
latest_report=$(find "$LOG_DIR" -name "master-report-*.json" -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -d' ' -f2-)
if [ -z "$latest_report" ] || [ ! -f "$latest_report" ]; then
echo "No report files found" >&2
exit 1
fi
Generate Prometheus metrics
cat > "$METRICS_FILE" <HELP backup_verify_last_run_timestamp Unix timestamp of last verification run
TYPE backup_verify_last_run_timestamp gauge
backup_verify_last_run_timestamp $(date +%s)
HELP backup_verify_script_success Whether verification script succeeded (1) or failed (0)
TYPE backup_verify_script_success gauge
EOF
Parse JSON report and add metrics
jq -r '.results[] | "backup_verify_script_success{script=\"" + .script + "\"} " + (if .exit_code == 0 then "1" else "0" end)' "$latest_report" >> "$METRICS_FILE"
echo "Metrics exported to $METRICS_FILE"
sudo chmod +x /opt/backup-verify/scripts/export-metrics.sh
Recovery testing automation
Create disaster recovery test script
Automate full disaster recovery scenarios to validate your entire backup and restore process.
#!/bin/bash
Disaster Recovery Test Script
set -euo pipefail
LOG_FILE="/var/log/backup-verify/dr-test-$(date +%Y%m%d_%H%M%S).log"
TEST_ENV_PREFIX="dr_test_$(date +%s)"
CONFIG_FILE="/opt/backup-verify/config/verify.conf"
Load configuration
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
fi
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
test_full_postgresql_recovery() {
local backup_file="$1"
local test_db="${TEST_ENV_PREFIX}_pg_recovery"
log "Testing full PostgreSQL recovery from: $(basename "$backup_file")"
# Create test database
PGPASSWORD="$PG_PASSWORD" createdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" "$test_db"
# Measure restore time
local start_time=$(date +%s)
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$test_db" -q
else
PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$test_db" -f "$backup_file" -q
fi
local end_time=$(date +%s)
local restore_duration=$((end_time - start_time))
# Validate data integrity
local table_count=$(PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$test_db" -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';" | xargs)
local row_count=$(PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$test_db" -t -c "SELECT SUM(n_tup_ins + n_tup_upd) FROM pg_stat_user_tables;" | xargs || echo 0)
log "PostgreSQL recovery completed: $table_count tables, ~$row_count rows, ${restore_duration}s restore time"
# Cleanup
PGPASSWORD="$PG_PASSWORD" dropdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" "$test_db"
return 0
}
test_full_mysql_recovery() {
local backup_file="$1"
local test_db="${TEST_ENV_PREFIX}_my_recovery"
log "Testing full MySQL recovery from: $(basename "$backup_file")"
# Create test database
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -e "CREATE DATABASE IF NOT EXISTS \$test_db\;"
# Measure restore time
local start_time=$(date +%s)
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" "$test_db"
else
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" "$test_db" < "$backup_file"
fi
local end_time=$(date +%s)
local restore_duration=$((end_time - start_time))
# Validate data integrity
local table_count=$(mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -s -N -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '$test_db';")
local size_mb=$(mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -s -N -e "SELECT ROUND(SUM(data_length + index_length) / 1024 / 1024, 2) FROM information_schema.tables WHERE table_schema = '$test_db';" | head -1)
log "MySQL recovery completed: $table_count tables, ${size_mb}MB data, ${restore_duration}s restore time"
# Cleanup
mysql -h "$MY_HOST" -P "$MY_PORT" -u "$MY_USER" -p"$MY_PASSWORD" -e "DROP DATABASE IF EXISTS \$test_db\;"
return 0
}
main() {
log "Starting disaster recovery test"
# Test PostgreSQL recovery
local pg_backup=$(find "/backups/postgresql" -name ".sql" -type f -mtime -1 | head -1)
if [ -n "$pg_backup" ]; then
test_full_postgresql_recovery "$pg_backup"
else
log "WARNING: No recent PostgreSQL backup found for testing"
fi
# Test MySQL recovery
local my_backup=$(find "/backups/mysql" -name ".sql" -type f -mtime -1 | head -1)
if [ -n "$my_backup" ]; then
test_full_mysql_recovery "$my_backup"
else
log "WARNING: No recent MySQL backup found for testing"
fi
log "Disaster recovery test completed"
}
main "$@"
sudo chmod +x /opt/backup-verify/scripts/disaster-recovery-test.sh
Schedule weekly disaster recovery tests
Create a separate systemd timer for comprehensive disaster recovery testing.
[Unit]
Description=Backup Disaster Recovery Test
After=network.target postgresql.service mysql.service
Wants=postgresql.service mysql.service
[Service]
Type=oneshot
User=backup-verify
Group=backup-verify
WorkingDirectory=/opt/backup-verify
ExecStart=/opt/backup-verify/scripts/disaster-recovery-test.sh
StandardOutput=journal
StandardError=journal
SyslogIdentifier=backup-dr-test
Extended timeout for large restores
TimeoutStartSec=7200
Security settings
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/opt/backup-verify /var/log/backup-verify /tmp
PrivateTmp=true
[Unit]
Description=Weekly Backup Disaster Recovery Test
Requires=backup-dr-test.service
[Timer]
Run weekly on Sunday at 3:00 AM
OnCalendar=Sun --* 03:00:00
RandomizedDelaySec=3600
Persistent=true
[Install]
WantedBy=timers.target
sudo systemctl daemon-reload
sudo systemctl enable backup-dr-test.timer
sudo systemctl start backup-dr-test.timer
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Permission denied accessing backup files | backup-verify user lacks read permissions | sudo chown -R backup-verify:backup-verify /backups or add user to backup group |
| Database connection refused during verification | Incorrect credentials or database not running | Check config file /opt/backup-verify/config/verify.conf and test connection manually |
| Verification script exits with timeout | Large backup files taking too long to restore | Increase TimeoutStartSec in systemd service or optimize backup compression |
| Email alerts not being sent | Postfix not configured or mail command missing | Install mail utilities and configure SMTP relay in /etc/postfix/main.cf |
| Systemd timer not running | Timer not enabled or service dependency missing | sudo systemctl enable backup-verify.timer and check dependencies |
| Verification passes but restore actually fails | Test database too small or incomplete validation | Add application-specific validation queries to verification scripts |
Next steps
- Monitor backup health with Prometheus and Grafana dashboards
- Extend verification to Redis and other NoSQL databases
- Centralize backup verification logs for better visibility
- Deploy backup verification in Kubernetes environments
- Generate compliance reports for backup validation audits
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
INSTALL_DIR="/opt/backup-verify"
LOG_DIR="/var/log/backup-verify"
SERVICE_USER="backup-verify"
BACKUP_DIRS=("/backups/postgresql" "/backups/mysql" "/backups/filesystem")
# Progress counter
STEP=1
TOTAL_STEPS=8
print_step() {
echo -e "${BLUE}[$STEP/$TOTAL_STEPS]${NC} $1"
((STEP++))
}
print_success() {
echo -e "${GREEN}✓${NC} $1"
}
print_warning() {
echo -e "${YELLOW}⚠${NC} $1"
}
print_error() {
echo -e "${RED}✗${NC} $1"
}
cleanup() {
if [[ $? -ne 0 ]]; then
print_error "Installation failed. Cleaning up..."
systemctl stop backup-verify.timer 2>/dev/null || true
systemctl disable backup-verify.timer 2>/dev/null || true
rm -rf /etc/systemd/system/backup-verify.* 2>/dev/null || true
userdel -r $SERVICE_USER 2>/dev/null || true
rm -rf $INSTALL_DIR $LOG_DIR 2>/dev/null || true
print_error "Cleanup completed"
fi
}
trap cleanup ERR
check_prerequisites() {
if [[ $EUID -ne 0 ]]; then
print_error "This script must be run as root"
exit 1
fi
if ! command -v systemctl >/dev/null 2>&1; then
print_error "systemd is required but not found"
exit 1
fi
}
detect_distro() {
if [ ! -f /etc/os-release ]; then
print_error "Cannot detect Linux distribution"
exit 1
fi
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
POSTGRES_CLIENT="postgresql-client"
MYSQL_CLIENT="mysql-client"
MAIL_PKG="mailutils"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
POSTGRES_CLIENT="postgresql"
MYSQL_CLIENT="mysql"
MAIL_PKG="mailx"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
POSTGRES_CLIENT="postgresql"
MYSQL_CLIENT="mysql"
MAIL_PKG="mailx"
;;
*)
print_error "Unsupported distribution: $ID"
exit 1
;;
esac
print_success "Detected distribution: $PRETTY_NAME"
}
install_packages() {
print_step "Installing required packages..."
$PKG_UPDATE
$PKG_INSTALL $POSTGRES_CLIENT $MYSQL_CLIENT rsync gzip pigz pv jq bc $MAIL_PKG
print_success "Packages installed successfully"
}
create_user_and_directories() {
print_step "Creating service user and directory structure..."
if ! id $SERVICE_USER >/dev/null 2>&1; then
useradd -r -s /bin/bash -d $INSTALL_DIR $SERVICE_USER
print_success "Created service user: $SERVICE_USER"
fi
mkdir -p $INSTALL_DIR/{scripts,config,temp}
mkdir -p $LOG_DIR
mkdir -p "${BACKUP_DIRS[@]}" 2>/dev/null || true
chown -R $SERVICE_USER:$SERVICE_USER $INSTALL_DIR
chown $SERVICE_USER:$SERVICE_USER $LOG_DIR
chmod 755 $INSTALL_DIR $LOG_DIR
chmod 750 $INSTALL_DIR/{scripts,config,temp}
print_success "Directory structure created"
}
create_postgresql_script() {
print_step "Creating PostgreSQL backup verification script..."
cat > $INSTALL_DIR/scripts/verify-postgresql.sh << 'EOF'
#!/usr/bin/env bash
set -euo pipefail
BACKUP_DIR="/backups/postgresql"
TEST_DB="backup_verify_test"
LOG_FILE="/var/log/backup-verify/postgresql-$(date +%Y%m%d_%H%M%S).log"
MAX_AGE_HOURS=48
PG_HOST="${PG_HOST:-localhost}"
PG_PORT="${PG_PORT:-5432}"
PG_USER="${PG_USER:-postgres}"
PG_PASSWORD="${PG_PASSWORD:-}"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
cleanup() {
log "Cleaning up test database"
PGPASSWORD="$PG_PASSWORD" dropdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" --if-exists "$TEST_DB" 2>/dev/null || true
rm -rf /tmp/pg_verify_*
}
trap cleanup EXIT
verify_backup() {
local backup_file="$1"
local backup_name=$(basename "$backup_file")
log "Verifying backup: $backup_name"
local file_age=$(($(date +%s) - $(stat -c %Y "$backup_file")))
local max_age=$((MAX_AGE_HOURS * 3600))
if [ $file_age -gt $max_age ]; then
log "ERROR: Backup file is older than $MAX_AGE_HOURS hours"
return 1
fi
if [[ "$backup_file" == *.gz ]] && ! gzip -t "$backup_file"; then
log "ERROR: Backup file is corrupted"
return 1
fi
PGPASSWORD="$PG_PASSWORD" createdb -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" "$TEST_DB"
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -q
else
PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -f "$backup_file" -q
fi
local table_count=$(PGPASSWORD="$PG_PASSWORD" psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$TEST_DB" -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';" | xargs)
if [ "$table_count" -eq 0 ]; then
log "WARNING: No tables found in restored database"
return 1
fi
log "SUCCESS: Backup verified - $table_count tables restored"
return 0
}
main() {
log "Starting PostgreSQL backup verification"
[[ -d "$BACKUP_DIR" ]] || { log "ERROR: Backup directory not found: $BACKUP_DIR"; exit 1; }
local success_count=0 total_count=0
while IFS= read -r -d '' backup_file; do
((total_count++))
if verify_backup "$backup_file"; then
((success_count++))
fi
done < <(find "$BACKUP_DIR" -name "*.sql*" -type f -mtime -2 -print0 2>/dev/null)
log "Verification completed: $success_count/$total_count backups verified successfully"
}
main "$@"
EOF
chmod 750 $INSTALL_DIR/scripts/verify-postgresql.sh
chown $SERVICE_USER:$SERVICE_USER $INSTALL_DIR/scripts/verify-postgresql.sh
print_success "PostgreSQL verification script created"
}
create_mysql_script() {
print_step "Creating MySQL backup verification script..."
cat > $INSTALL_DIR/scripts/verify-mysql.sh << 'EOF'
#!/usr/bin/env bash
set -euo pipefail
BACKUP_DIR="/backups/mysql"
TEST_DB="backup_verify_test"
LOG_FILE="/var/log/backup-verify/mysql-$(date +%Y%m%d_%H%M%S).log"
MAX_AGE_HOURS=48
MYSQL_HOST="${MYSQL_HOST:-localhost}"
MYSQL_PORT="${MYSQL_PORT:-3306}"
MYSQL_USER="${MYSQL_USER:-root}"
MYSQL_PASSWORD="${MYSQL_PASSWORD:-}"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
cleanup() {
log "Cleaning up test database"
mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASSWORD" -e "DROP DATABASE IF EXISTS $TEST_DB;" 2>/dev/null || true
}
trap cleanup EXIT
verify_backup() {
local backup_file="$1"
local backup_name=$(basename "$backup_file")
log "Verifying backup: $backup_name"
local file_age=$(($(date +%s) - $(stat -c %Y "$backup_file")))
local max_age=$((MAX_AGE_HOURS * 3600))
if [ $file_age -gt $max_age ]; then
log "ERROR: Backup file is older than $MAX_AGE_HOURS hours"
return 1
fi
if [[ "$backup_file" == *.gz ]] && ! gzip -t "$backup_file"; then
log "ERROR: Backup file is corrupted"
return 1
fi
mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASSWORD" -e "CREATE DATABASE $TEST_DB;"
if [[ "$backup_file" == *.gz ]]; then
gunzip -c "$backup_file" | mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASSWORD" "$TEST_DB"
else
mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASSWORD" "$TEST_DB" < "$backup_file"
fi
local table_count=$(mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASSWORD" "$TEST_DB" -sN -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '$TEST_DB';")
if [ "$table_count" -eq 0 ]; then
log "WARNING: No tables found in restored database"
return 1
fi
log "SUCCESS: Backup verified - $table_count tables restored"
return 0
}
main() {
log "Starting MySQL backup verification"
[[ -d "$BACKUP_DIR" ]] || { log "ERROR: Backup directory not found: $BACKUP_DIR"; exit 1; }
local success_count=0 total_count=0
while IFS= read -r -d '' backup_file; do
((total_count++))
if verify_backup "$backup_file"; then
((success_count++))
fi
done < <(find "$BACKUP_DIR" -name "*.sql*" -type f -mtime -2 -print0 2>/dev/null)
log "Verification completed: $success_count/$total_count backups verified successfully"
}
main "$@"
EOF
chmod 750 $INSTALL_DIR/scripts/verify-mysql.sh
chown $SERVICE_USER:$SERVICE_USER $INSTALL_DIR/scripts/verify-mysql.sh
print_success "MySQL verification script created"
}
create_main_script() {
print_step "Creating main verification orchestrator..."
cat > $INSTALL_DIR/scripts/backup-verify.sh << 'EOF'
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="/opt/backup-verify/scripts"
LOG_FILE="/var/log/backup-verify/main-$(date +%Y%m%d_%H%M%S).log"
EMAIL_RECIPIENT="${EMAIL_RECIPIENT:-root@localhost}"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
run_verification() {
local script="$1"
local name="$2"
log "Starting $name verification"
if [[ -x "$SCRIPT_DIR/$script" ]]; then
if "$SCRIPT_DIR/$script"; then
log "SUCCESS: $name verification completed"
return 0
else
log "ERROR: $name verification failed"
return 1
fi
else
log "WARNING: $name verification script not found or not executable"
return 1
fi
}
main() {
log "Starting backup verification suite"
local total_success=0
local total_tests=0
for script_info in "verify-postgresql.sh:PostgreSQL" "verify-mysql.sh:MySQL"; do
IFS=':' read -r script name <<< "$script_info"
((total_tests++))
if run_verification "$script" "$name"; then
((total_success++))
fi
done
log "Verification suite completed: $total_success/$total_tests tests passed"
if [[ $total_success -lt $total_tests ]]; then
echo "Backup verification failures detected. Check logs in /var/log/backup-verify/" | mail -s "Backup Verification Alert" "$EMAIL_RECIPIENT" 2>/dev/null || true
fi
}
main "$@"
EOF
chmod 750 $INSTALL_DIR/scripts/backup-verify.sh
chown $SERVICE_USER:$SERVICE_USER $INSTALL_DIR/scripts/backup-verify.sh
print_success "Main verification script created"
}
create_systemd_service() {
print_step "Creating systemd service and timer..."
cat > /etc/systemd/system/backup-verify.service << EOF
[Unit]
Description=Backup Verification Service
After=network.target
[Service]
Type=oneshot
User=$SERVICE_USER
Group=$SERVICE_USER
ExecStart=$INSTALL_DIR/scripts/backup-verify.sh
WorkingDirectory=$INSTALL_DIR
StandardOutput=journal
StandardError=journal
EOF
cat > /etc/systemd/system/backup-verify.timer << EOF
[Unit]
Description=Run backup
Review the script before running. Execute with: bash install.sh