Learn to implement production-grade backup and restore procedures for Redis Sentinel clusters with automated scheduling, point-in-time recovery, and comprehensive monitoring to ensure data durability and business continuity.
Prerequisites
- Redis Sentinel cluster running
- Root or sudo access
- At least 2GB free disk space for backups
- Email service configured for alerts
- Basic knowledge of Redis architecture
What this solves
Redis Sentinel clusters provide high availability but require robust backup strategies to protect against data loss, corruption, and disaster scenarios. This tutorial implements automated backup procedures with point-in-time recovery capabilities and monitoring to ensure your Redis Sentinel cluster data remains protected and recoverable in production environments.
Step-by-step configuration
Install required backup tools
Install Redis tools and backup utilities needed for cluster data management.
sudo apt update
sudo apt install -y redis-tools awscli s3cmd gzip pigz cron
Create backup directory structure
Set up organized directories for local backups with proper permissions and ownership.
sudo mkdir -p /var/backups/redis/{daily,weekly,monthly,snapshots}
sudo mkdir -p /var/backups/redis/logs
sudo useradd -r -s /bin/false redis-backup
sudo chown -R redis-backup:redis-backup /var/backups/redis
sudo chmod 750 /var/backups/redis
sudo chmod 755 /var/backups/redis/logs
Configure Redis nodes for backup
Enable RDB snapshots and configure backup-friendly settings on all Redis nodes.
# Enable RDB snapshots
save 900 1
save 300 10
save 60 10000
RDB configuration
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
dir /var/lib/redis
Enable AOF for point-in-time recovery
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
Backup-friendly settings
stop-writes-on-bgsave-error yes
rdb-save-incremental-fsync yes
Restart Redis services
Apply the new configuration by restarting all Redis nodes in the cluster.
sudo systemctl restart redis-server
sudo systemctl status redis-server
Create backup script for Sentinel cluster
Develop a comprehensive backup script that handles multiple Redis nodes and Sentinel configuration.
#!/bin/bash
Redis Sentinel Backup Script
Supports multiple Redis nodes and Sentinel instances
set -euo pipefail
Configuration
BACKUP_DIR="/var/backups/redis"
LOG_FILE="$BACKUP_DIR/logs/backup-$(date +%Y%m%d-%H%M%S).log"
RETENTION_DAYS=30
S3_BUCKET="your-redis-backups"
NOTIFY_EMAIL="admin@example.com"
Redis Sentinel configuration
SENTINEL_HOSTS=("127.0.0.1:26379" "127.0.0.1:26380" "127.0.0.1:26381")
MASTER_NAME="mymaster"
Logging function
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
Error handling
error_exit() {
log "ERROR: $1"
echo "Redis backup failed: $1" | mail -s "Redis Backup Alert" "$NOTIFY_EMAIL"
exit 1
}
Get master info from Sentinel
get_master_info() {
local sentinel_host="$1"
local host port
for attempt in {1..3}; do
if master_info=$(redis-cli -h "${sentinel_host%:}" -p "${sentinel_host#:}" \
SENTINEL get-master-addr-by-name "$MASTER_NAME" 2>/dev/null); then
host=$(echo "$master_info" | head -n1)
port=$(echo "$master_info" | tail -n1)
echo "$host:$port"
return 0
fi
log "Attempt $attempt failed for sentinel $sentinel_host"
sleep 2
done
return 1
}
Get all Redis nodes (master + slaves)
get_all_nodes() {
local master_addr
local nodes=()
# Get master address
for sentinel in "${SENTINEL_HOSTS[@]}"; do
if master_addr=$(get_master_info "$sentinel"); then
nodes+=("$master_addr")
break
fi
done
[[ ${#nodes[@]} -eq 0 ]] && error_exit "Could not determine master address"
# Get slave addresses
local master_host="${master_addr%:*}"
local master_port="${master_addr#*:}"
if slaves=$(redis-cli -h "$master_host" -p "$master_port" \
INFO replication | grep "slave[0-9]" | cut -d: -f1 --complement); then
while IFS= read -r slave_line; do
if [[ -n "$slave_line" ]]; then
local slave_ip=$(echo "$slave_line" | cut -d, -f1 | cut -d= -f2)
local slave_port=$(echo "$slave_line" | cut -d, -f2 | cut -d= -f2)
nodes+=("$slave_ip:$slave_port")
fi
done <<< "$slaves"
fi
printf '%s\n' "${nodes[@]}"
}
Backup single Redis instance
backup_redis_instance() {
local host="$1"
local port="$2"
local backup_path="$3"
local timestamp="$4"
log "Starting backup for Redis instance $host:$port"
# Create instance backup directory
local instance_dir="$backup_path/redis-${host//\./-}-$port"
mkdir -p "$instance_dir"
# Save current dataset
if ! redis-cli -h "$host" -p "$port" BGSAVE; then
error_exit "Failed to initiate BGSAVE on $host:$port"
fi
# Wait for background save to complete
local save_status
while true; do
save_status=$(redis-cli -h "$host" -p "$port" LASTSAVE)
sleep 2
local current_save=$(redis-cli -h "$host" -p "$port" LASTSAVE)
[[ "$save_status" != "$current_save" ]] && break
sleep 3
done
# Get Redis data directory
local redis_dir
redis_dir=$(redis-cli -h "$host" -p "$port" CONFIG GET dir | tail -n1)
# Copy RDB file
if [[ -f "$redis_dir/dump.rdb" ]]; then
cp "$redis_dir/dump.rdb" "$instance_dir/dump-$timestamp.rdb"
pigz -9 "$instance_dir/dump-$timestamp.rdb"
log "RDB backup completed for $host:$port"
else
log "WARNING: No RDB file found for $host:$port"
fi
# Copy AOF file if enabled
if [[ -f "$redis_dir/appendonly.aof" ]]; then
cp "$redis_dir/appendonly.aof" "$instance_dir/appendonly-$timestamp.aof"
pigz -9 "$instance_dir/appendonly-$timestamp.aof"
log "AOF backup completed for $host:$port"
fi
# Save instance configuration
redis-cli -h "$host" -p "$port" CONFIG GET '*' > "$instance_dir/config-$timestamp.txt"
# Save instance info
redis-cli -h "$host" -p "$port" INFO ALL > "$instance_dir/info-$timestamp.txt"
}
Backup Sentinel configuration
backup_sentinel_config() {
local backup_path="$1"
local timestamp="$2"
log "Backing up Sentinel configuration"
local sentinel_dir="$backup_path/sentinel"
mkdir -p "$sentinel_dir"
# Backup Sentinel configuration files
for config_file in /etc/redis/sentinel*.conf; do
if [[ -f "$config_file" ]]; then
cp "$config_file" "$sentinel_dir/$(basename "$config_file")-$timestamp"
fi
done
# Get runtime Sentinel configuration
for sentinel in "${SENTINEL_HOSTS[@]}"; do
local host="${sentinel%:*}"
local port="${sentinel#*:}"
if redis-cli -h "$host" -p "$port" ping >/dev/null 2>&1; then
redis-cli -h "$host" -p "$port" SENTINEL masters > \
"$sentinel_dir/masters-${host//\./-}-$port-$timestamp.txt"
redis-cli -h "$host" -p "$port" SENTINEL slaves "$MASTER_NAME" > \
"$sentinel_dir/slaves-${host//\./-}-$port-$timestamp.txt"
fi
done
}
Upload to S3 (if configured)
upload_to_s3() {
local backup_path="$1"
if [[ -n "$S3_BUCKET" ]] && command -v aws >/dev/null 2>&1; then
log "Uploading backup to S3"
if aws s3 sync "$backup_path" "s3://$S3_BUCKET/$(basename "$backup_path")/" \
--storage-class STANDARD_IA; then
log "S3 upload completed successfully"
else
log "WARNING: S3 upload failed"
fi
fi
}
Clean old backups
cleanup_old_backups() {
log "Cleaning up backups older than $RETENTION_DAYS days"
find "$BACKUP_DIR" -type f -name "*.gz" -mtime +"$RETENTION_DAYS" -delete
find "$BACKUP_DIR" -type f -name "*.txt" -mtime +"$RETENTION_DAYS" -delete
find "$BACKUP_DIR" -empty -type d -delete
}
Main backup function
main() {
local timestamp
timestamp=$(date +%Y%m%d-%H%M%S)
local backup_type="${1:-daily}"
local backup_path="$BACKUP_DIR/$backup_type/$timestamp"
log "Starting Redis Sentinel cluster backup ($backup_type)"
# Create backup directory
mkdir -p "$backup_path"
# Get all Redis nodes
local nodes
if ! nodes=$(get_all_nodes); then
error_exit "Failed to discover Redis nodes"
fi
# Backup each Redis instance
while IFS= read -r node; do
local host="${node%:*}"
local port="${node#*:}"
backup_redis_instance "$host" "$port" "$backup_path" "$timestamp"
done <<< "$nodes"
# Backup Sentinel configuration
backup_sentinel_config "$backup_path" "$timestamp"
# Create backup manifest
{
echo "Redis Sentinel Backup Manifest"
echo "Timestamp: $timestamp"
echo "Backup Type: $backup_type"
echo "Nodes Backed Up:"
echo "$nodes"
echo "Files:"
find "$backup_path" -type f -exec ls -lh {} \;
} > "$backup_path/manifest.txt"
# Upload to S3
upload_to_s3 "$backup_path"
# Cleanup old backups
cleanup_old_backups
log "Backup completed successfully"
# Send success notification
echo "Redis Sentinel backup completed successfully at $timestamp" | \
mail -s "Redis Backup Success" "$NOTIFY_EMAIL"
}
Execute main function
main "$@"
Make backup script executable
Set proper permissions for the backup script and create a secure wrapper.
sudo chmod 750 /usr/local/bin/redis-sentinel-backup.sh
sudo chown root:redis-backup /usr/local/bin/redis-sentinel-backup.sh
Create restore script
Implement point-in-time recovery capabilities with validation and rollback options.
#!/bin/bash
Redis Sentinel Restore Script
Supports point-in-time recovery and validation
set -euo pipefail
Configuration
BACKUP_DIR="/var/backups/redis"
LOG_FILE="/var/log/redis-restore-$(date +%Y%m%d-%H%M%S).log"
REDIS_DATA_DIR="/var/lib/redis"
NOTIFY_EMAIL="admin@example.com"
Logging function
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
Error handling
error_exit() {
log "ERROR: $1"
exit 1
}
List available backups
list_backups() {
echo "Available backups:"
find "$BACKUP_DIR" -name "manifest.txt" | while read -r manifest; do
local backup_dir
backup_dir=$(dirname "$manifest")
local backup_name
backup_name=$(basename "$backup_dir")
local backup_type
backup_type=$(basename "$(dirname "$backup_dir")")
echo " $backup_type/$backup_name"
echo " $(grep 'Timestamp:' "$manifest")"
echo " $(grep 'Nodes Backed Up:' -A 10 "$manifest" | tail -n +2 | head -5)"
echo ""
done
}
Validate backup integrity
validate_backup() {
local backup_path="$1"
log "Validating backup integrity: $backup_path"
if [[ ! -f "$backup_path/manifest.txt" ]]; then
error_exit "Backup manifest not found"
fi
# Check for RDB files
local rdb_files
rdb_files=$(find "$backup_path" -name "*.rdb.gz" | wc -l)
if [[ "$rdb_files" -eq 0 ]]; then
error_exit "No RDB backup files found"
fi
log "Found $rdb_files RDB backup files"
# Validate compressed files
find "$backup_path" -name "*.gz" | while read -r compressed_file; do
if ! pigz -t "$compressed_file" >/dev/null 2>&1; then
error_exit "Corrupted backup file: $compressed_file"
fi
done
log "Backup validation completed successfully"
}
Stop Redis services
stop_redis_services() {
log "Stopping Redis services"
sudo systemctl stop redis-server redis-sentinel || true
sleep 5
# Ensure processes are stopped
if pgrep -x redis-server >/dev/null || pgrep -x redis-sentinel >/dev/null; then
log "Force killing remaining Redis processes"
sudo pkill -TERM redis-server redis-sentinel || true
sleep 3
sudo pkill -KILL redis-server redis-sentinel || true
fi
}
Start Redis services
start_redis_services() {
log "Starting Redis services"
sudo systemctl start redis-server
sleep 5
sudo systemctl start redis-sentinel
# Wait for services to be ready
local attempts=0
while [[ $attempts -lt 30 ]]; do
if redis-cli ping >/dev/null 2>&1; then
log "Redis services started successfully"
return 0
fi
sleep 2
((attempts++))
done
error_exit "Redis services failed to start"
}
Backup current data
backup_current_data() {
local backup_dir="/tmp/redis-restore-backup-$(date +%Y%m%d-%H%M%S)"
log "Creating safety backup of current data: $backup_dir"
mkdir -p "$backup_dir"
if [[ -f "$REDIS_DATA_DIR/dump.rdb" ]]; then
cp "$REDIS_DATA_DIR/dump.rdb" "$backup_dir/"
fi
if [[ -f "$REDIS_DATA_DIR/appendonly.aof" ]]; then
cp "$REDIS_DATA_DIR/appendonly.aof" "$backup_dir/"
fi
echo "$backup_dir"
}
Restore Redis instance
restore_redis_instance() {
local instance_backup_dir="$1"
local target_host="${2:-127.0.0.1}"
local target_port="${3:-6379}"
log "Restoring Redis instance from: $instance_backup_dir"
# Find and extract RDB file
local rdb_file
rdb_file=$(find "$instance_backup_dir" -name "dump-*.rdb.gz" | head -n1)
if [[ -n "$rdb_file" ]]; then
log "Restoring RDB file: $(basename "$rdb_file")"
# Extract to temporary location first
local temp_rdb="/tmp/$(basename "${rdb_file%.gz}")"
pigz -dc "$rdb_file" > "$temp_rdb"
# Validate RDB file
if redis-check-rdb "$temp_rdb"; then
sudo cp "$temp_rdb" "$REDIS_DATA_DIR/dump.rdb"
sudo chown redis:redis "$REDIS_DATA_DIR/dump.rdb"
sudo chmod 660 "$REDIS_DATA_DIR/dump.rdb"
rm "$temp_rdb"
log "RDB file restored successfully"
else
rm "$temp_rdb"
error_exit "Invalid RDB file"
fi
fi
# Find and extract AOF file if available
local aof_file
aof_file=$(find "$instance_backup_dir" -name "appendonly-*.aof.gz" | head -n1)
if [[ -n "$aof_file" ]]; then
log "Restoring AOF file: $(basename "$aof_file")"
# Extract to temporary location first
local temp_aof="/tmp/$(basename "${aof_file%.gz}")"
pigz -dc "$aof_file" > "$temp_aof"
# Validate AOF file
if redis-check-aof "$temp_aof"; then
sudo cp "$temp_aof" "$REDIS_DATA_DIR/appendonly.aof"
sudo chown redis:redis "$REDIS_DATA_DIR/appendonly.aof"
sudo chmod 660 "$REDIS_DATA_DIR/appendonly.aof"
rm "$temp_aof"
log "AOF file restored successfully"
else
log "WARNING: Invalid AOF file, skipping"
rm "$temp_aof"
fi
fi
}
Restore Sentinel configuration
restore_sentinel_config() {
local backup_path="$1"
log "Restoring Sentinel configuration"
local sentinel_backup="$backup_path/sentinel"
if [[ -d "$sentinel_backup" ]]; then
# Backup current Sentinel configs
sudo cp /etc/redis/sentinel*.conf /tmp/ 2>/dev/null || true
# Restore Sentinel configuration files
find "$sentinel_backup" -name "sentinel.conf-" | while read -r config_backup; do
local original_name
original_name=$(basename "$config_backup" | sed 's/-[0-9]-[0-9]$//')
sudo cp "$config_backup" "/etc/redis/$original_name"
sudo chown redis:redis "/etc/redis/$original_name"
sudo chmod 640 "/etc/redis/$original_name"
done
log "Sentinel configuration restored"
fi
}
Main restore function
main() {
if [[ $# -lt 1 ]]; then
echo "Usage: $0 [target_host] [target_port]"
echo ""
list_backups
exit 1
fi
local backup_path="$BACKUP_DIR/$1"
local target_host="${2:-127.0.0.1}"
local target_port="${3:-6379}"
if [[ ! -d "$backup_path" ]]; then
error_exit "Backup path not found: $backup_path"
fi
log "Starting Redis Sentinel cluster restore"
log "Backup path: $backup_path"
log "Target: $target_host:$target_port"
# Validate backup
validate_backup "$backup_path"
# Create safety backup
local safety_backup
safety_backup=$(backup_current_data)
log "Safety backup created: $safety_backup"
# Stop services
stop_redis_services
# Restore each instance
find "$backup_path" -type d -name "redis-*" | while read -r instance_dir; do
restore_redis_instance "$instance_dir" "$target_host" "$target_port"
done
# Restore Sentinel configuration
restore_sentinel_config "$backup_path"
# Start services
start_redis_services
log "Restore completed successfully"
# Send notification
echo "Redis Sentinel restore completed successfully from backup: $1" | \
mail -s "Redis Restore Success" "$NOTIFY_EMAIL"
}
Execute main function
main "$@"
Set restore script permissions
Configure secure permissions for the restore script.
sudo chmod 750 /usr/local/bin/redis-sentinel-restore.sh
sudo chown root:redis-backup /usr/local/bin/redis-sentinel-restore.sh
Configure automated backup scheduling
Set up cron jobs for automated daily, weekly, and monthly backups.
sudo -u redis-backup crontab -e
# Redis Sentinel Automated Backups
Daily backup at 2:30 AM
30 2 * /usr/local/bin/redis-sentinel-backup.sh daily
Weekly backup on Sunday at 3:30 AM
30 3 0 /usr/local/bin/redis-sentinel-backup.sh weekly
Monthly backup on 1st day at 4:30 AM
30 4 1 /usr/local/bin/redis-sentinel-backup.sh monthly
Cleanup logs older than 30 days daily at 1:00 AM
0 1 find /var/backups/redis/logs -name ".log" -mtime +30 -delete
Create backup monitoring script
Implement monitoring to track backup health and send alerts for failures.
#!/bin/bash
Redis Backup Monitoring Script
set -euo pipefail
BACKUP_DIR="/var/backups/redis"
LOG_DIR="$BACKUP_DIR/logs"
ALERT_EMAIL="admin@example.com"
MAX_BACKUP_AGE_HOURS=26 # Alert if no backup in 26 hours
MIN_BACKUP_SIZE_MB=1 # Alert if backup smaller than 1MB
Logging function
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1"
}
Send alert
send_alert() {
local subject="$1"
local message="$2"
echo "$message" | mail -s "Redis Backup Alert: $subject" "$ALERT_EMAIL"
log "ALERT: $subject - $message"
}
Check backup freshness
check_backup_freshness() {
local latest_backup
latest_backup=$(find "$BACKUP_DIR/daily" -name "manifest.txt" -type f -printf '%T@ %p\n' | \
sort -n | tail -1 | cut -d' ' -f2-)
if [[ -z "$latest_backup" ]]; then
send_alert "No Backups Found" "No backup manifests found in $BACKUP_DIR/daily"
return 1
fi
local backup_time
backup_time=$(stat -c %Y "$latest_backup")
local current_time
current_time=$(date +%s)
local age_hours
age_hours=$(( (current_time - backup_time) / 3600 ))
if [[ $age_hours -gt $MAX_BACKUP_AGE_HOURS ]]; then
send_alert "Backup Too Old" \
"Latest backup is $age_hours hours old (threshold: $MAX_BACKUP_AGE_HOURS hours)"
return 1
fi
log "Backup freshness OK: latest backup is $age_hours hours old"
return 0
}
Check backup sizes
check_backup_sizes() {
local issues=0
find "$BACKUP_DIR/daily" -name "*.gz" -mtime -1 | while read -r backup_file; do
local size_mb
size_mb=$(du -m "$backup_file" | cut -f1)
if [[ $size_mb -lt $MIN_BACKUP_SIZE_MB ]]; then
send_alert "Small Backup File" \
"Backup file $backup_file is only ${size_mb}MB (threshold: ${MIN_BACKUP_SIZE_MB}MB)"
((issues++))
fi
done
if [[ $issues -eq 0 ]]; then
log "Backup sizes OK"
return 0
else
return 1
fi
}
Check backup integrity
check_backup_integrity() {
local latest_backup_dir
latest_backup_dir=$(find "$BACKUP_DIR/daily" -maxdepth 1 -type d | sort | tail -1)
if [[ -z "$latest_backup_dir" || ! -d "$latest_backup_dir" ]]; then
send_alert "No Backup Directory" "No valid backup directory found"
return 1
fi
local corrupted_files=0
# Test compressed files
find "$latest_backup_dir" -name "*.gz" | while read -r gz_file; do
if ! pigz -t "$gz_file" >/dev/null 2>&1; then
send_alert "Corrupted Backup" "Backup file $gz_file is corrupted"
((corrupted_files++))
fi
done
if [[ $corrupted_files -eq 0 ]]; then
log "Backup integrity OK"
return 0
else
return 1
fi
}
Check disk space
check_disk_space() {
local backup_disk_usage
backup_disk_usage=$(df "$BACKUP_DIR" | awk 'NR==2 {print $5}' | sed 's/%//')
if [[ $backup_disk_usage -gt 90 ]]; then
send_alert "Low Disk Space" \
"Backup partition is ${backup_disk_usage}% full"
return 1
elif [[ $backup_disk_usage -gt 80 ]]; then
log "WARNING: Backup partition is ${backup_disk_usage}% full"
else
log "Disk space OK: ${backup_disk_usage}% used"
fi
return 0
}
Check backup logs for errors
check_backup_logs() {
local recent_errors
recent_errors=$(find "$LOG_DIR" -name "*.log" -mtime -1 -exec grep -l "ERROR" {} \; | wc -l)
if [[ $recent_errors -gt 0 ]]; then
local error_details
error_details=$(find "$LOG_DIR" -name "*.log" -mtime -1 -exec grep "ERROR" {} \;)
send_alert "Backup Errors Detected" \
"Found errors in recent backup logs:\n$error_details"
return 1
fi
log "No errors found in recent backup logs"
return 0
}
Generate backup report
generate_backup_report() {
local report_file="$BACKUP_DIR/backup-report-$(date +%Y%m%d).txt"
{
echo "Redis Sentinel Backup Status Report"
echo "Generated: $(date)"
echo "====================================\n"
echo "Backup Statistics:"
echo "- Daily backups: $(find "$BACKUP_DIR/daily" -name "manifest.txt" | wc -l)"
echo "- Weekly backups: $(find "$BACKUP_DIR/weekly" -name "manifest.txt" | wc -l)"
echo "- Monthly backups: $(find "$BACKUP_DIR/monthly" -name "manifest.txt" | wc -l)"
echo "- Total backup size: $(du -sh "$BACKUP_DIR" | cut -f1)"
echo "\nLatest Backups:"
find "$BACKUP_DIR" -name "manifest.txt" -printf '%T+ %p\n' | \
sort -r | head -5 | while read -r line; do
echo " $line"
done
echo "\nDisk Usage:"
df -h "$BACKUP_DIR"
} > "$report_file"
log "Backup report generated: $report_file"
}
Main monitoring function
main() {
log "Starting Redis backup monitoring"
local checks_passed=0
local total_checks=5
check_backup_freshness && ((checks_passed++))
check_backup_sizes && ((checks_passed++))
check_backup_integrity && ((checks_passed++))
check_disk_space && ((checks_passed++))
check_backup_logs && ((checks_passed++))
if [[ $checks_passed -eq $total_checks ]]; then
log "All backup health checks passed ($checks_passed/$total_checks)"
else
local failed_checks=$((total_checks - checks_passed))
send_alert "Health Check Failures" \
"$failed_checks out of $total_checks backup health checks failed"
fi
# Generate daily report
if [[ "$(date +%H)" == "06" ]]; then # Generate at 6 AM
generate_backup_report
fi
log "Backup monitoring completed"
}
Execute main function
main "$@"
Set monitoring script permissions and schedule
Configure permissions and add monitoring to crontab for regular health checks.
sudo chmod 750 /usr/local/bin/redis-backup-monitor.sh
sudo chown root:redis-backup /usr/local/bin/redis-backup-monitor.sh
sudo -u redis-backup crontab -e
Add monitoring to cron
Add backup monitoring checks to the existing crontab.
# Backup health monitoring every 2 hours
0 /2 /usr/local/bin/redis-backup-monitor.sh
Configure Prometheus metrics export
Create a metrics exporter for backup monitoring integration with Prometheus.
#!/bin/bash
Redis Backup Metrics Exporter for Prometheus
set -euo pipefail
BACKUP_DIR="/var/backups/redis"
METRICS_FILE="/var/lib/node_exporter/textfile_collector/redis_backup.prom"
METRICS_TMP="${METRICS_FILE}.tmp"
Create metrics directory
mkdir -p "$(dirname "$METRICS_FILE")"
Generate metrics
{
echo "# HELP redis_backup_last_success_timestamp Last successful backup timestamp"
echo "# TYPE redis_backup_last_success_timestamp gauge"
latest_backup=$(find "$BACKUP_DIR/daily" -name "manifest.txt" -type f -printf '%T@ %p\n' | \
sort -n | tail -1 | cut -d' ' -f1)
if [[ -n "$latest_backup" ]]; then
echo "redis_backup_last_success_timestamp ${latest_backup%.*}"
else
echo "redis_backup_last_success_timestamp 0"
fi
echo "# HELP redis_backup_size_bytes Total backup size in bytes"
echo "# TYPE redis_backup_size_bytes gauge"
backup_size=$(du -sb "$BACKUP_DIR" | cut -f1)
echo "redis_backup_size_bytes $backup_size"
echo "# HELP redis_backup_count_total Total number of backups"
echo "# TYPE redis_backup_count_total gauge"
daily_count=$(find "$BACKUP_DIR/daily" -name "manifest.txt" | wc -l)
weekly_count=$(find "$BACKUP_DIR/weekly" -name "manifest.txt" | wc -l)
monthly_count=$(find "$BACKUP_DIR/monthly" -name "manifest.txt" | wc -l)
echo "redis_backup_count_total{type=\"daily\"} $daily_count"
echo "redis_backup_count_total{type=\"weekly\"} $weekly_count"
echo "redis_backup_count_total{type=\"monthly\"} $monthly_count"
echo "# HELP redis_backup_age_hours Age of latest backup in hours"
echo "# TYPE redis_backup_age_hours gauge"
if [[ -n "$latest_backup" ]]; then
current_time=$(date +%s)
age_hours=$(( (current_time - ${latest_backup%.*}) / 3600 ))
echo "redis_backup_age_hours $age_hours"
else
echo "redis_backup_age_hours -1"
fi
} > "$METRICS_TMP"
Atomically replace metrics file
mv "$METRICS_TMP" "$METRICS_FILE"
echo "Backup metrics updated: $METRICS_FILE"
Schedule metrics export
Add metrics generation to crontab for Prometheus integration.
sudo chmod 755 /usr/local/bin/redis-backup-metrics.sh
sudo -u redis-backup crontab -e
# Export backup metrics for Prometheus every 5 minutes
/5 * /usr/local/bin/redis-backup-metrics.sh
Verify your setup
Test your backup and restore procedures to ensure everything works correctly.
# Test manual backup
sudo -u redis-backup /usr/local/bin/redis-sentinel-backup.sh daily
Check backup was created
ls -la /var/backups/redis/daily/
View backup manifest
cat /var/backups/redis/daily/*/manifest.txt
Test backup monitoring
sudo -u redis-backup /usr/local/bin/redis-backup-monitor.sh
Check cron jobs are scheduled
sudo -u redis-backup crontab -l
Verify Redis Sentinel is working
redis-cli -p 26379 SENTINEL masters
redis-cli -p 26379 SENTINEL slaves mymaster
Test metrics export
/usr/local/bin/redis-backup-metrics.sh
cat /var/lib/node_exporter/textfile_collector/redis_backup.prom
For monitoring integration, you can link this setup with comprehensive Redis monitoring using Prometheus and Grafana for complete observability.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Backup script fails with permission denied | Incorrect file permissions or ownership | sudo chown redis-backup:redis-backup /var/backups/redis && sudo chmod 750 backup script |
| Restore fails with "RDB file corrupted" | Incomplete backup or storage corruption | Use redis-check-rdb to validate and try different backup |
| Sentinel doesn't recognize restored master | Replication configuration mismatch | Reconfigure replication with SLAVEOF and restart Sentinel |
| Backup size is unexpectedly small | Redis memory optimization or empty dataset | Check Redis memory usage with INFO memory and dataset size |
| S3 upload fails | AWS credentials or network connectivity issues | Verify AWS CLI configuration with aws sts get-caller-identity |
| Monitoring alerts not received | Mail service not configured | Install and configure postfix: sudo apt install mailutils postfix |
| Cron jobs not running | Cron service disabled or user permissions | Enable cron: sudo systemctl enable --now cron and check user permissions |
| Backup restoration takes too long | Large dataset or slow disk I/O | Use faster compression (gzip instead of pigz -9) and consider SSD storage |
Next steps
- Configure Redis Sentinel with SSL/TLS encryption and authentication for enhanced security
- Implement custom Prometheus exporters to enhance backup monitoring
- Set up remote backup storage with S3-compatible encryption for offsite backup storage
- Configure Redis Sentinel cross-datacenter replication for disaster recovery
- Implement automated failover testing for Redis Sentinel to validate backup procedures
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Redis Sentinel Backup and Restore Installation Script
# Supports Ubuntu, Debian, AlmaLinux, Rocky Linux, CentOS, RHEL
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
BACKUP_USER="redis-backup"
BACKUP_DIR="/var/backups/redis"
SCRIPT_DIR="/opt/redis-backup"
LOG_DIR="/var/log/redis-backup"
SERVICE_NAME="redis-backup"
# Detect distribution and set package manager
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
REDIS_CONF_DIR="/etc/redis"
REDIS_SERVICE="redis-server"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf check-update || true"
PKG_INSTALL="dnf install -y"
REDIS_CONF_DIR="/etc/redis"
REDIS_SERVICE="redis"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum check-update || true"
PKG_INSTALL="yum install -y"
REDIS_CONF_DIR="/etc"
REDIS_SERVICE="redis"
;;
*)
echo -e "${RED}Error: Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Error: Cannot detect distribution${NC}"
exit 1
fi
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
cleanup() {
if [ $? -ne 0 ]; then
log_error "Installation failed. Cleaning up..."
systemctl stop "$SERVICE_NAME" 2>/dev/null || true
rm -f "/etc/systemd/system/${SERVICE_NAME}.service"
systemctl daemon-reload
fi
}
trap cleanup ERR
check_prerequisites() {
if [ "$EUID" -ne 0 ]; then
log_error "This script must be run as root"
exit 1
fi
if ! command -v systemctl &> /dev/null; then
log_error "systemctl is required but not available"
exit 1
fi
}
install_packages() {
echo -e "${BLUE}[1/8]${NC} Installing required packages..."
$PKG_UPDATE
if [ "$PKG_MGR" = "apt" ]; then
$PKG_INSTALL redis-tools awscli s3cmd gzip pigz cron mailutils
else
# Enable EPEL for additional packages on RHEL-based systems
if [ "$ID" = "centos" ] || [ "$ID" = "rhel" ] || [ "$ID" = "almalinux" ] || [ "$ID" = "rocky" ]; then
$PKG_INSTALL epel-release
fi
$PKG_INSTALL redis awscli s3cmd gzip pigz cronie mailx
systemctl enable --now crond
fi
log_info "Packages installed successfully"
}
create_backup_user() {
echo -e "${BLUE}[2/8]${NC} Creating backup user and directories..."
if ! id "$BACKUP_USER" &>/dev/null; then
useradd -r -s /bin/false -d "$BACKUP_DIR" "$BACKUP_USER"
fi
mkdir -p "$BACKUP_DIR"/{daily,weekly,monthly,snapshots}
mkdir -p "$LOG_DIR"
mkdir -p "$SCRIPT_DIR"
chown -R "$BACKUP_USER:$BACKUP_USER" "$BACKUP_DIR"
chown -R "$BACKUP_USER:$BACKUP_USER" "$LOG_DIR"
chmod 750 "$BACKUP_DIR"
chmod 755 "$LOG_DIR"
log_info "Backup user and directories created"
}
create_backup_script() {
echo -e "${BLUE}[3/8]${NC} Creating backup script..."
cat > "$SCRIPT_DIR/redis-backup.sh" << 'EOF'
#!/bin/bash
set -euo pipefail
# Configuration
BACKUP_DIR="/var/backups/redis"
LOG_FILE="/var/log/redis-backup/backup-$(date +%Y%m%d-%H%M%S).log"
RETENTION_DAYS=30
S3_BUCKET="${S3_BUCKET:-}"
NOTIFY_EMAIL="${NOTIFY_EMAIL:-root@localhost}"
# Redis Sentinel configuration
SENTINEL_HOSTS=("127.0.0.1:26379")
MASTER_NAME="mymaster"
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
error_exit() {
log "ERROR: $1"
echo "Redis backup failed: $1" | mail -s "Redis Backup Alert" "$NOTIFY_EMAIL" || true
exit 1
}
get_master_info() {
local sentinel_host="$1"
local host port
for attempt in {1..3}; do
if master_info=$(redis-cli -h "${sentinel_host%:*}" -p "${sentinel_host#*:}" \
SENTINEL get-master-addr-by-name "$MASTER_NAME" 2>/dev/null); then
host=$(echo "$master_info" | head -n1)
port=$(echo "$master_info" | tail -n1)
echo "$host:$port"
return 0
fi
log "Attempt $attempt failed for sentinel $sentinel_host"
sleep 2
done
return 1
}
backup_redis_node() {
local node="$1"
local host="${node%:*}"
local port="${node#*:}"
local timestamp=$(date +%Y%m%d-%H%M%S)
local backup_file="$BACKUP_DIR/daily/redis-${host}-${port}-${timestamp}"
log "Starting backup for Redis node $node"
# Trigger BGSAVE
if ! redis-cli -h "$host" -p "$port" BGSAVE; then
error_exit "Failed to trigger BGSAVE for $node"
fi
# Wait for BGSAVE to complete
while [ "$(redis-cli -h "$host" -p "$port" LASTSAVE)" = "$(redis-cli -h "$host" -p "$port" LASTSAVE)" ]; do
sleep 1
done
# Copy RDB file
local redis_dir=$(redis-cli -h "$host" -p "$port" CONFIG GET dir | tail -n1)
local rdb_file=$(redis-cli -h "$host" -p "$port" CONFIG GET dbfilename | tail -n1)
if [ -f "$redis_dir/$rdb_file" ]; then
cp "$redis_dir/$rdb_file" "${backup_file}.rdb"
pigz "${backup_file}.rdb"
log "RDB backup completed for $node"
fi
# Copy AOF file if enabled
if [ "$(redis-cli -h "$host" -p "$port" CONFIG GET appendonly | tail -n1)" = "yes" ]; then
local aof_file=$(redis-cli -h "$host" -p "$port" CONFIG GET appendfilename | tail -n1)
if [ -f "$redis_dir/$aof_file" ]; then
cp "$redis_dir/$aof_file" "${backup_file}.aof"
pigz "${backup_file}.aof"
log "AOF backup completed for $node"
fi
fi
# Upload to S3 if configured
if [ -n "$S3_BUCKET" ]; then
aws s3 cp "${backup_file}.rdb.gz" "s3://$S3_BUCKET/$(basename ${backup_file}.rdb.gz)" || log "S3 upload failed"
[ -f "${backup_file}.aof.gz" ] && aws s3 cp "${backup_file}.aof.gz" "s3://$S3_BUCKET/$(basename ${backup_file}.aof.gz)" || true
fi
}
cleanup_old_backups() {
log "Cleaning up backups older than $RETENTION_DAYS days"
find "$BACKUP_DIR/daily" -name "redis-*" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR/weekly" -name "redis-*" -mtime +$((RETENTION_DAYS * 4)) -delete
find "$BACKUP_DIR/monthly" -name "redis-*" -mtime +$((RETENTION_DAYS * 12)) -delete
}
main() {
log "Starting Redis Sentinel cluster backup"
# Get master address
local master_addr=""
for sentinel in "${SENTINEL_HOSTS[@]}"; do
if master_addr=$(get_master_info "$sentinel"); then
break
fi
done
[ -z "$master_addr" ] && error_exit "Could not determine master address"
# Backup master
backup_redis_node "$master_addr"
# Get and backup slaves
local master_host="${master_addr%:*}"
local master_port="${master_addr#*:}"
if slaves=$(redis-cli -h "$master_host" -p "$master_port" INFO replication | grep "^slave[0-9]"); then
while IFS= read -r slave_line; do
local slave_ip=$(echo "$slave_line" | cut -d, -f1 | cut -d= -f2)
local slave_port=$(echo "$slave_line" | cut -d, -f2 | cut -d= -f2)
backup_redis_node "$slave_ip:$slave_port"
done <<< "$slaves"
fi
cleanup_old_backups
log "Backup completed successfully"
}
main "$@"
EOF
chmod 750 "$SCRIPT_DIR/redis-backup.sh"
chown "$BACKUP_USER:$BACKUP_USER" "$SCRIPT_DIR/redis-backup.sh"
log_info "Backup script created"
}
create_restore_script() {
echo -e "${BLUE}[4/8]${NC} Creating restore script..."
cat > "$SCRIPT_DIR/redis-restore.sh" << 'EOF'
#!/bin/bash
set -euo pipefail
usage() {
echo "Usage: $0 -f <backup_file> -h <redis_host> -p <redis_port> [-t rdb|aof]"
exit 1
}
while getopts "f:h:p:t:" opt; do
case $opt in
f) BACKUP_FILE="$OPTARG" ;;
h) REDIS_HOST="$OPTARG" ;;
p) REDIS_PORT="$OPTARG" ;;
t) BACKUP_TYPE="$OPTARG" ;;
*) usage ;;
esac
done
[ -z "${BACKUP_FILE:-}" ] || [ -z "${REDIS_HOST:-}" ] || [ -z "${REDIS_PORT:-}" ] && usage
BACKUP_TYPE="${BACKUP_TYPE:-rdb}"
SERVICE_NAME="redis"
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian) SERVICE_NAME="redis-server" ;;
*) SERVICE_NAME="redis" ;;
esac
fi
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1"
}
if [ ! -f "$BACKUP_FILE" ]; then
log "ERROR: Backup file $BACKUP_FILE not found"
exit 1
fi
log "Starting restore process"
# Get Redis configuration
REDIS_DIR=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" CONFIG GET dir | tail -n1)
RDB_FILE=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" CONFIG GET dbfilename | tail -n1)
# Stop Redis service
log "Stopping Redis service"
systemctl stop "$SERVICE_NAME"
# Restore backup
if [[ "$BACKUP_FILE" == *.gz ]]; then
gunzip -c "$BACKUP_FILE" > "$REDIS_DIR/$RDB_FILE"
else
cp "$BACKUP_FILE" "$REDIS_DIR/$RDB_FILE"
fi
chown redis:redis "$REDIS_DIR/$RDB_FILE"
chmod 640 "$REDIS_DIR/$RDB_FILE"
# Start Redis service
log "Starting Redis service"
systemctl start "$SERVICE_NAME"
log "Restore completed successfully"
EOF
chmod 750 "$SCRIPT_DIR/redis-restore.sh"
chown root:root "$SCRIPT_DIR/redis-restore.sh"
log_info "Restore script created"
}
configure_redis_backup() {
echo -e "${BLUE}[5/8]${NC} Configuring Redis for backup..."
if [ -f "$REDIS_CONF_DIR/redis.conf" ]; then
REDIS_CONF="$REDIS_CONF_DIR/redis.conf"
elif [ -f "/etc/redis.conf" ]; then
REDIS_CONF="/etc/redis.conf"
else
log_warn "Redis configuration file not found, skipping automatic configuration"
return
fi
# Backup original config
cp "$REDIS_CONF" "${REDIS_CONF}.backup-$(date +%Y%m%d)"
# Add backup-friendly settings
cat >> "$REDIS_CONF" << EOF
# Redis backup configuration
save 900 1
save 300 10
save 60 10000
rdbcompression yes
rdbchecksum yes
stop-writes-on-bgsave-error yes
rdb-save-incremental-fsync yes
appendonly yes
appendfsync everysec
EOF
log_info "Redis configuration updated"
}
create_systemd_service() {
echo -e "${BLUE}[6/8]${NC} Creating systemd service..."
cat > "/etc/systemd/system/${SERVICE_NAME}.service" << EOF
[Unit]
Description=Redis Backup Service
After=network.target
[Service]
Type=oneshot
User=$BACKUP_USER
ExecStart=$SCRIPT_DIR/redis-backup.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
log_info "Systemd service created"
}
setup_cron() {
echo -e "${BLUE}[7/8]${NC} Setting up cron jobs..."
# Daily backup at 2 AM
echo "0 2 * * * $BACKUP_USER $SCRIPT_DIR/redis-backup.sh > /dev/null
Review the script before running. Execute with: bash install.sh