Configure a high-performance Varnish Cache 7 cluster with intelligent load balancing across multiple web servers, SSL termination, health monitoring, and shared cache synchronization for enterprise web acceleration.
Prerequisites
- Root access to multiple servers
- Basic understanding of HTTP caching
- Network connectivity between cluster nodes
- SSL certificates for HTTPS termination
What this solves
This tutorial shows you how to deploy a production-ready Varnish Cache 7 cluster that distributes traffic across multiple backend servers while sharing cache data between Varnish nodes. You'll implement advanced load balancing algorithms, health checks, SSL termination, and performance monitoring to achieve sub-second response times even under heavy traffic loads.
Step-by-step configuration
Update system packages
Start by ensuring all system packages are current to get the latest security updates.
sudo apt update && sudo apt upgrade -y
Install Varnish Cache 7
Add the official Varnish repository and install the latest stable version with development tools.
curl -fsSL https://packagecloud.io/varnishcache/varnish70/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/varnish.gpg
echo "deb [signed-by=/usr/share/keyrings/varnish.gpg] https://packagecloud.io/varnishcache/varnish70/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/varnish.list
sudo apt update
sudo apt install -y varnish varnish-dev
Configure systemd service parameters
Create a systemd override file to configure Varnish with proper memory allocation and network binding.
sudo mkdir -p /etc/systemd/system/varnish.service.d
sudo tee /etc/systemd/system/varnish.service.d/override.conf > /dev/null << 'EOF'
[Service]
ExecStart=
ExecStart=/usr/sbin/varnishd -a :80 -a :443,PROXY -T localhost:6082 -f /etc/varnish/default.vcl -S /etc/varnish/secret -s malloc,4G -p thread_pool_min=100 -p thread_pool_max=5000 -p listen_depth=2048
EOF
Create cluster configuration with multiple backends
Configure the main VCL file with backend definitions, health checks, and load balancing logic.
vcl 4.1;
import std;
import directors;
Backend web servers
backend web1 {
.host = "10.0.1.10";
.port = "8080";
.probe = {
.url = "/health";
.timeout = 2s;
.interval = 10s;
.window = 5;
.threshold = 3;
.initial = 3;
};
.connect_timeout = 2s;
.first_byte_timeout = 10s;
.between_bytes_timeout = 2s;
.max_connections = 300;
}
backend web2 {
.host = "10.0.1.11";
.port = "8080";
.probe = {
.url = "/health";
.timeout = 2s;
.interval = 10s;
.window = 5;
.threshold = 3;
.initial = 3;
};
.connect_timeout = 2s;
.first_byte_timeout = 10s;
.between_bytes_timeout = 2s;
.max_connections = 300;
}
backend web3 {
.host = "10.0.1.12";
.port = "8080";
.probe = {
.url = "/health";
.timeout = 2s;
.interval = 10s;
.window = 5;
.threshold = 3;
.initial = 3;
};
.connect_timeout = 2s;
.first_byte_timeout = 10s;
.between_bytes_timeout = 2s;
.max_connections = 300;
}
Initialize load balancer with round-robin algorithm
sub vcl_init {
new vdir = directors.round_robin();
vdir.add_backend(web1);
vdir.add_backend(web2);
vdir.add_backend(web3);
}
sub vcl_recv {
# Set backend director
set req.backend_hint = vdir.backend();
# Handle SSL termination
if (std.port(server.ip) == 443) {
set req.http.X-Forwarded-Proto = "https";
set req.http.X-Forwarded-Port = "443";
} else {
set req.http.X-Forwarded-Proto = "http";
set req.http.X-Forwarded-Port = "80";
}
# Set real client IP
if (req.http.X-Real-IP) {
set req.http.X-Forwarded-For = req.http.X-Real-IP;
} else {
set req.http.X-Forwarded-For = client.ip;
}
# Remove cookies for static assets
if (req.url ~ "\.(css|js|png|gif|jp(e)?g|swf|ico|pdf|mov|fla|zip|rar)$") {
unset req.http.cookie;
return(hash);
}
# Handle purge requests from trusted IPs
if (req.method == "PURGE") {
if (client.ip !~ purge_acl) {
return(synth(405, "Purging not allowed for " + client.ip));
}
return(purge);
}
# Normalize Accept-Encoding
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
unset req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
unset req.http.Accept-Encoding;
}
}
return(hash);
}
ACL for purge requests
acl purge_acl {
"127.0.0.1";
"10.0.1.0/24";
}
sub vcl_backend_response {
# Set cache TTL based on content type
if (beresp.http.content-type ~ "^(text/css|application/javascript|text/javascript)") {
set beresp.ttl = 1w;
set beresp.http.Cache-Control = "public, max-age=604800";
}
if (beresp.http.content-type ~ "^image/") {
set beresp.ttl = 1d;
set beresp.http.Cache-Control = "public, max-age=86400";
}
# Handle backend errors
if (beresp.status >= 400) {
set beresp.ttl = 0s;
set beresp.grace = 15s;
return(deliver);
}
# Enable grace mode for stale content
set beresp.grace = 1h;
# Remove backend server headers
unset beresp.http.Server;
unset beresp.http.X-Powered-By;
return(deliver);
}
sub vcl_deliver {
# Add cache status headers
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
# Add backend information for debugging
set resp.http.X-Served-By = server.hostname;
# Security headers
set resp.http.X-Frame-Options = "SAMEORIGIN";
set resp.http.X-Content-Type-Options = "nosniff";
set resp.http.X-XSS-Protection = "1; mode=block";
# Remove internal headers
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Age;
return(deliver);
}
sub vcl_backend_error {
# Serve stale content if backend is down
if (beresp.status >= 500 && beresp.status < 600) {
return(abandon);
}
return(deliver);
}
Configure SSL termination with HAProxy
Install HAProxy to handle SSL termination and proxy to Varnish cluster nodes.
sudo apt install -y haproxy
Create HAProxy configuration for SSL and clustering
Configure HAProxy to terminate SSL and distribute traffic across multiple Varnish nodes.
global
log 127.0.0.1:514 local0
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
# SSL Configuration
ssl-default-bind-ciphers ECDHE+AESGCM:ECDHE+CHACHA20:RSA+AESGCM:RSA+AES:!aNULL:!MD5:!DSS
ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
# Performance tuning
tune.ssl.default-dh-param 2048
tune.bufsize 32768
tune.maxrewrite 8192
defaults
mode http
log global
option httplog
option dontlognull
option log-health-checks
option forwardfor
option http-server-close
timeout connect 5000
timeout client 50000
timeout server 50000
errorfile 400 /etc/haproxy/errors/400.http
errorfile 403 /etc/haproxy/errors/403.http
errorfile 408 /etc/haproxy/errors/408.http
errorfile 500 /etc/haproxy/errors/500.http
errorfile 502 /etc/haproxy/errors/502.http
errorfile 503 /etc/haproxy/errors/503.http
errorfile 504 /etc/haproxy/errors/504.http
frontend varnish_cluster
bind *:80
bind *:443 ssl crt /etc/ssl/certs/example.com.pem
# Security headers
http-response set-header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload"
http-response set-header X-Frame-Options "SAMEORIGIN"
http-response set-header X-Content-Type-Options "nosniff"
# Redirect HTTP to HTTPS
redirect scheme https if !{ ssl_fc }
# Health check endpoint
acl health_check path_beg /haproxy-health
http-request return status 200 content-type text/plain string "OK\n" if health_check
# Rate limiting
stick-table type ip size 100k expire 30s store http_req_rate(10s),http_err_rate(10s)
http-request track-sc0 src
http-request reject if { sc_http_req_rate(0) gt 50 }
default_backend varnish_nodes
backend varnish_nodes
balance roundrobin
option httpchk GET /health HTTP/1.1\r\nHost:\ localhost
# Varnish cluster nodes
server varnish1 10.0.2.10:80 check inter 2000 rise 2 fall 3 weight 100
server varnish2 10.0.2.11:80 check inter 2000 rise 2 fall 3 weight 100
server varnish3 10.0.2.12:80 check inter 2000 rise 2 fall 3 weight 100
# Health check configuration
http-check expect status 200
http-check connect
listen stats
bind *:8404
stats enable
stats uri /
stats refresh 10s
stats admin if TRUE
Configure shared cache storage with Redis
Install Redis for cache invalidation coordination between Varnish nodes.
sudo apt install -y redis-server
Create Redis configuration for cache coordination
Configure Redis to handle cache invalidation messages between cluster nodes with persistence and security.
# Network
bind 127.0.0.1 10.0.2.10
port 6379
tcp-backlog 511
tcp-keepalive 300
Memory and persistence
maxmemory 512mb
maxmemory-policy allkeys-lru
save 900 1
save 300 10
save 60 10000
rdbcompression yes
rdbchecksum yes
dbfilename varnish-cache.rdb
dir /var/lib/redis
Logging
loglevel notice
logfile /var/log/redis/redis-server.log
Security
requirepass varnish_cluster_redis_2024
rename-command FLUSHDB ""
rename-command FLUSHALL ""
rename-command DEBUG ""
rename-command CONFIG "CONFIG_b835f3d4c2a7"
Performance
timeout 300
tcp-keepalive 60
databases 16
Install and configure cache purge scripts
Create scripts to handle coordinated cache purging across all Varnish nodes in the cluster.
#!/bin/bash
Varnish Cluster Cache Purge Script
Usage: ./varnish-purge-cluster.sh [URL_PATTERN]
set -euo pipefail
Configuration
VARNISH_NODES=("10.0.2.10:80" "10.0.2.11:80" "10.0.2.12:80")
REDIS_HOST="10.0.2.10"
REDIS_PORT="6379"
REDIS_PASS="varnish_cluster_redis_2024"
LOG_FILE="/var/log/varnish-cluster-purge.log"
Functions
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
purge_varnish_node() {
local node="$1"
local url_pattern="$2"
local host="${node%:*}"
local port="${node#*:}"
if curl -sf -X PURGE "http://${host}:${port}${url_pattern}" > /dev/null 2>&1; then
log_message "Successfully purged ${url_pattern} on ${node}"
return 0
else
log_message "Failed to purge ${url_pattern} on ${node}"
return 1
fi
}
notify_redis() {
local url_pattern="$1"
local message="{\"action\":\"purge\",\"pattern\":\"${url_pattern}\",\"timestamp\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}"
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" -a "$REDIS_PASS" PUBLISH varnish:purge "$message" > /dev/null 2>&1
}
Main execution
URL_PATTERN="${1:-/*}"
log_message "Starting cluster purge for pattern: $URL_PATTERN"
Purge all nodes
success_count=0
for node in "${VARNISH_NODES[@]}"; do
if purge_varnish_node "$node" "$URL_PATTERN"; then
((success_count++))
fi
done
Notify via Redis
notify_redis "$URL_PATTERN"
log_message "Purge completed. Success: $success_count/${#VARNISH_NODES[@]} nodes"
if [ "$success_count" -eq "${#VARNISH_NODES[@]}" ]; then
exit 0
else
exit 1
fi
Set script permissions and create monitoring
Configure proper permissions for the purge script and set up basic monitoring.
sudo chmod 755 /usr/local/bin/varnish-purge-cluster.sh
sudo mkdir -p /var/log
sudo touch /var/log/varnish-cluster-purge.log
sudo chown varnish:varnish /var/log/varnish-cluster-purge.log
sudo chmod 644 /var/log/varnish-cluster-purge.log
Configure Prometheus monitoring integration
Install varnish_exporter to provide metrics for monitoring and alerting.
wget https://github.com/jonnenauha/prometheus_varnish_exporter/releases/download/1.6.1/prometheus_varnish_exporter-1.6.1.linux-amd64.tar.gz
tar xzf prometheus_varnish_exporter-1.6.1.linux-amd64.tar.gz
sudo mv prometheus_varnish_exporter-1.6.1.linux-amd64/prometheus_varnish_exporter /usr/local/bin/
sudo chmod 755 /usr/local/bin/prometheus_varnish_exporter
Create systemd service for metrics exporter
Configure the Varnish Prometheus exporter to run as a system service with proper monitoring configuration.
[Unit]
Description=Prometheus Varnish Exporter
After=network.target varnish.service
Requires=varnish.service
[Service]
Type=simple
User=varnish
Group=varnish
ExecStart=/usr/local/bin/prometheus_varnish_exporter \
-web.listen-address=:9131 \
-varnish.instance= \
-web.telemetry-path=/metrics \
-verbose
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
Configure firewall rules
Open required ports for the Varnish cluster, HAProxy, and monitoring services.
# Allow HTTP/HTTPS traffic
sudo ufw allow 80/tcp
sudo ufw allow 443/tcp
Allow Varnish admin interface (restrict to management network)
sudo ufw allow from 10.0.0.0/8 to any port 6082
Allow HAProxy stats (restrict to management network)
sudo ufw allow from 10.0.0.0/8 to any port 8404
Allow Redis (cluster communication)
sudo ufw allow from 10.0.2.0/24 to any port 6379
Allow Prometheus metrics
sudo ufw allow from 10.0.0.0/8 to any port 9131
Allow inter-cluster communication
sudo ufw allow from 10.0.2.0/24 to any port 80
Start and enable all services
Enable and start all components of the Varnish cluster in the correct order.
# Reload systemd and start Redis
sudo systemctl daemon-reload
sudo systemctl enable --now redis-server
Start Varnish with new configuration
sudo systemctl enable --now varnish
Start HAProxy for SSL termination
sudo systemctl enable --now haproxy
Start metrics exporter
sudo systemctl enable --now varnish-exporter
Verify all services are running
sudo systemctl status redis-server varnish haproxy varnish-exporter
Configure advanced load balancing algorithms
Implement weighted round-robin with sticky sessions
Create an advanced VCL configuration that supports session persistence and weighted backend selection.
vcl 4.1;
import std;
import directors;
import cookie;
Include main backends from default.vcl
include "backends.vcl";
Advanced director with weighted round-robin
sub vcl_init {
# Main director with weights
new vdir = directors.round_robin();
vdir.add_backend(web1, 3.0); # 3x weight
vdir.add_backend(web2, 2.0); # 2x weight
vdir.add_backend(web3, 1.0); # 1x weight
# Fallback director for when backends fail
new fallback = directors.fallback();
fallback.add_backend(web1);
fallback.add_backend(web2);
fallback.add_backend(web3);
}
sub vcl_recv {
# Session persistence based on cookie
if (req.http.Cookie ~ "JSESSIONID=([^;]+)") {
set req.http.X-Session-ID = regsub(req.http.Cookie, ".JSESSIONID=([^;]+).", "\1");
# Hash session ID to determine backend
set req.backend_hint = vdir.backend(std.fnv1a(req.http.X-Session-ID));
} else {
# Use weighted round-robin for new sessions
set req.backend_hint = vdir.backend();
}
# Fallback if primary backend is unhealthy
if (!std.healthy(req.backend_hint)) {
set req.backend_hint = fallback.backend();
}
# Geographic routing based on client IP
if (client.ip ~ geo_eu) {
set req.http.X-Geo-Region = "EU";
} elsif (client.ip ~ geo_us) {
set req.http.X-Geo-Region = "US";
} else {
set req.http.X-Geo-Region = "OTHER";
}
return(hash);
}
Geographic ACLs
acl geo_eu {
"185.0.0.0/8"; # European IP range example
"78.0.0.0/8";
}
acl geo_us {
"192.0.0.0/8"; # US IP range example
"198.0.0.0/8";
}
Create separate backend definitions file
Organize backend configurations in a separate file for easier management.
# Backend pool definitions with enhanced health checks
backend web1 {
.host = "10.0.1.10";
.port = "8080";
.probe = {
.request = "GET /health HTTP/1.1"
"Host: localhost"
"Connection: close"
"User-Agent: Varnish-Health-Check";
.timeout = 3s;
.interval = 15s;
.window = 8;
.threshold = 6;
.initial = 4;
};
.connect_timeout = 3s;
.first_byte_timeout = 15s;
.between_bytes_timeout = 3s;
.max_connections = 500;
}
backend web2 {
.host = "10.0.1.11";
.port = "8080";
.probe = {
.request = "GET /health HTTP/1.1"
"Host: localhost"
"Connection: close"
"User-Agent: Varnish-Health-Check";
.timeout = 3s;
.interval = 15s;
.window = 8;
.threshold = 6;
.initial = 4;
};
.connect_timeout = 3s;
.first_byte_timeout = 15s;
.between_bytes_timeout = 3s;
.max_connections = 500;
}
backend web3 {
.host = "10.0.1.12";
.port = "8080";
.probe = {
.request = "GET /health HTTP/1.1"
"Host: localhost"
"Connection: close"
"User-Agent: Varnish-Health-Check";
.timeout = 3s;
.interval = 15s;
.window = 8;
.threshold = 6;
.initial = 4;
};
.connect_timeout = 3s;
.first_byte_timeout = 15s;
.between_bytes_timeout = 3s;
.max_connections = 500;
}
Implement cache warming and performance optimization
Create automated cache warming script
Build a script that preloads frequently accessed content into the cache cluster after deployments or cache purges.
#!/bin/bash
Varnish Cache Warming Script
Preloads critical URLs across all cluster nodes
set -euo pipefail
Configuration
VARNISH_NODES=("10.0.2.10" "10.0.2.11" "10.0.2.12")
DOMAIN="example.com"
LOG_FILE="/var/log/varnish-cache-warmer.log"
CONCURRENT_JOBS=10
TIMEOUT=30
Critical URLs to warm (add your most important pages)
CRITICAL_URLS=(
"/"
"/products"
"/api/v1/catalog"
"/static/css/main.css"
"/static/js/app.min.js"
"/images/logo.png"
"/about"
"/contact"
)
Functions
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
warm_url() {
local node="$1"
local url="$2"
local full_url="http://${node}${url}"
local response_code
response_code=$(curl -s -o /dev/null -w "%{http_code}" \
--max-time "$TIMEOUT" \
--header "Host: $DOMAIN" \
--header "X-Cache-Warm: true" \
--user-agent "VarnishCacheWarmer/1.0" \
"$full_url" 2>/dev/null || echo "000")
if [[ "$response_code" =~ ^[23] ]]; then
log_message "✓ Warmed $url on $node (HTTP $response_code)"
return 0
else
log_message "✗ Failed to warm $url on $node (HTTP $response_code)"
return 1
fi
}
Export function for parallel execution
export -f warm_url log_message
export DOMAIN TIMEOUT LOG_FILE
Main execution
log_message "Starting cache warming for ${#VARNISH_NODES[@]} nodes"
log_message "URLs to warm: ${#CRITICAL_URLS[@]}"
start_time=$(date +%s)
total_requests=0
successful_requests=0
Warm each URL on each node in parallel
for node in "${VARNISH_NODES[@]}"; do
for url in "${CRITICAL_URLS[@]}"; do
echo "$node $url"
done
done | xargs -n 2 -P "$CONCURRENT_JOBS" bash -c 'warm_url "$0" "$1" && echo "success" || echo "failure"' | \
while read -r result; do
((total_requests++))
if [[ "$result" == "success" ]]; then
((successful_requests++))
fi
done
end_time=$(date +%s)
duration=$((end_time - start_time))
log_message "Cache warming completed in ${duration}s"
log_message "Success rate: $successful_requests/$total_requests requests"
if [ "$successful_requests" -gt $((total_requests * 80 / 100)) ]; then
log_message "Cache warming successful (>80% success rate)"
exit 0
else
log_message "Cache warming failed (<80% success rate)"
exit 1
fi
Configure performance tuning parameters
Create optimized Varnish parameters for high-performance caching with memory and connection tuning.
# Performance-optimized VCL parameters
Include in main VCL after vcl_deliver
sub vcl_hit {
# Optimize cache hit delivery
if (obj.ttl >= 0s) {
# Fresh cache hit
set resp.http.X-Cache-Status = "HIT-FRESH";
return(deliver);
} elsif (obj.ttl + obj.grace > 0s) {
# Stale cache hit within grace period
set resp.http.X-Cache-Status = "HIT-STALE";
return(deliver);
} else {
# Expired cache
set resp.http.X-Cache-Status = "HIT-EXPIRED";
return(miss);
}
}
sub vcl_miss {
# Optimize cache miss handling
set resp.http.X-Cache-Status = "MISS";
return(fetch);
}
sub vcl_pass {
# Handle uncacheable requests
set resp.http.X-Cache-Status = "PASS";
return(fetch);
}
sub vcl_pipe {
# Handle streaming/websocket connections
set resp.http.X-Cache-Status = "PIPE";
return(pipe);
}
Advanced backend response handling
sub vcl_backend_response {
# Implement ESI for dynamic content
if (beresp.http.Content-Type ~ "text/html") {
set beresp.do_esi = true;
}
# Compress responses to save bandwidth and storage
if (beresp.http.content-type ~ "^(text/|application/json|application/javascript|application/xml)") {
set beresp.do_gzip = true;
}
# Set optimal TTL based on content patterns
if (bereq.url ~ "^/api/") {
# API responses - short TTL with grace
set beresp.ttl = 5m;
set beresp.grace = 1h;
set beresp.keep = 2h;
} elsif (bereq.url ~ "\.(css|js|jpg|png|gif|ico|svg)$") {
# Static assets - long TTL
set beresp.ttl = 7d;
set beresp.grace = 24h;
set beresp.keep = 48h;
} else {
# HTML and other content
set beresp.ttl = 1h;
set beresp.grace = 6h;
set beresp.keep = 12h;
}
return(deliver);
}
Set up cache warming automation
Create a systemd timer to automatically warm the cache after purges and at regular intervals.
sudo chmod 755 /usr/local/bin/varnish-cache-warmer.sh
sudo mkdir -p /var/log
sudo touch /var/log/varnish-cache-warmer.log
sudo chown varnish:varnish /var/log/varnish-cache-warmer.log
Create systemd timer for cache warming
Configure automated cache warming to run after deployments and during off-peak hours.
[Unit]
Description=Varnish Cache Warming Timer
Requires=varnish-cache-warmer.service
[Timer]
OnCalendar=--* 02:00:00
OnCalendar=--* 14:00:00
Persistent=true
RandomizedDelaySec=300
[Install]
WantedBy=timers.target
Create systemd service for cache warming
Define the cache warming service with proper error handling and logging.
[Unit]
Description=Varnish Cache Warming Service
After=varnish.service
Requires=varnish.service
[Service]
Type=oneshot
User=varnish
Group=varnish
ExecStart=/usr/local/bin/varnish-cache-warmer.sh
StandardOutput=journal
StandardError=journal
TimeoutSec=600
Enable cache warming automation
Activate the systemd timer for automated cache warming.
sudo systemctl daemon-reload
sudo systemctl enable varnish-cache-warmer.timer
sudo systemctl start varnish-cache-warmer.timer
sudo systemctl status varnish-cache-warmer.timer
Verify your setup
Test the Varnish cluster configuration and verify all components are working correctly.
# Check all services are running
sudo systemctl status varnish haproxy redis-server varnish-exporter
Test cache functionality
curl -I http://localhost/
curl -I http://localhost/ # Second request should show X-Cache: HIT
Check backend health
varnishadm backend.list
varnishadm backend.health
Test SSL termination
curl -I https://your-domain.com/
Verify cluster purging
sudo /usr/local/bin/varnish-purge-cluster.sh /test-page
Check metrics endpoint
curl http://localhost:9131/metrics | grep varnish
Test cache warming
sudo /usr/local/bin/varnish-cache-warmer.sh
Monitor HAProxy stats
curl http://localhost:8404/
Check Redis connectivity
redis-cli -h 10.0.2.10 -p 6379 -a varnish_cluster_redis_2024 ping
For more comprehensive monitoring, consider setting up Prometheus and Grafana dashboards to track cache hit rates, response times, and backend health across your cluster.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| 503 Service Unavailable | All backends are down or unhealthy | Check backend services: varnishadm backend.health |
| Low cache hit ratio | TTL too short or cookies preventing caching | Review VCL TTL settings and cookie handling rules |
| SSL errors from HAProxy | Certificate file permissions or path issues | Verify certificate: sudo ls -la /etc/ssl/certs/example.com.pem |
| Cache not purging on all nodes | Network connectivity or firewall blocking | Test connectivity: curl -X PURGE http://10.0.2.11/test |
| Redis connection errors | Authentication or network configuration | Test Redis: redis-cli -h HOST -p 6379 -a PASSWORD ping |
| Varnish crashes under load | Memory exhaustion or thread pool limits | Increase memory: -s malloc,8G and threads: -p thread_pool_max=10000 |
| Metrics not appearing | Prometheus exporter not connecting to Varnish | Check exporter logs: sudo journalctl -u varnish-exporter -f |
chmod 777 on Varnish configuration files or cache directories. This creates security vulnerabilities. Instead, use chown varnish:varnish and appropriate permissions like 644 for config files and 755 for directories.Next steps
- Set up comprehensive monitoring with Prometheus and Grafana to track cluster performance
- Implement advanced DDoS protection with HAProxy rate limiting and security rules
- Add high availability with keepalived for automatic HAProxy failover
- Integrate with CDN services for global content distribution
- Set up centralized log analysis with Elasticsearch and Kibana
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Varnish 7 Cluster Installation Script
# Configures Varnish Cache 7 with HAProxy SSL termination and load balancing
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Default configuration
VARNISH_PORT="6081"
HAPROXY_STATS_PORT="8404"
BACKEND_SERVERS="${BACKEND_SERVERS:-127.0.0.1:8080}"
DOMAIN_NAME="${1:-example.com}"
usage() {
echo "Usage: $0 [domain_name] [backend_servers]"
echo "Example: $0 mysite.com '192.168.1.10:8080,192.168.1.11:8080'"
echo "Backend servers: comma-separated list of IP:PORT"
exit 1
}
log() {
echo -e "${GREEN}[INFO]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1"
exit 1
}
cleanup() {
warn "Installation failed. Cleaning up..."
systemctl stop varnish haproxy 2>/dev/null || true
}
trap cleanup ERR
# Check prerequisites
[[ $EUID -eq 0 ]] || error "This script must be run as root"
# Detect distribution
if [ ! -f /etc/os-release ]; then
error "Cannot detect distribution. /etc/os-release not found."
fi
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
PKG_UPGRADE="apt upgrade -y"
RELEASE_CMD="lsb_release -cs"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf check-update || true"
PKG_INSTALL="dnf install -y"
PKG_UPGRADE="dnf update -y"
RELEASE_VAR="\$releasever"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum check-update || true"
PKG_INSTALL="yum install -y"
PKG_UPGRADE="yum update -y"
RELEASE_VAR="\$releasever"
;;
*)
error "Unsupported distribution: $ID"
;;
esac
# Parse backend servers
if [[ $# -ge 2 ]]; then
BACKEND_SERVERS="$2"
fi
log "[1/8] Updating system packages..."
$PKG_UPDATE
$PKG_UPGRADE
log "[2/8] Installing prerequisites..."
if [[ "$PKG_MGR" == "apt" ]]; then
$PKG_INSTALL curl gnupg2 software-properties-common lsb-release
else
$PKG_INSTALL curl gnupg2 epel-release
fi
log "[3/8] Installing Varnish Cache 7..."
if [[ "$PKG_MGR" == "apt" ]]; then
curl -fsSL https://packagecloud.io/varnishcache/varnish70/gpgkey | gpg --dearmor -o /usr/share/keyrings/varnish.gpg
echo "deb [signed-by=/usr/share/keyrings/varnish.gpg] https://packagecloud.io/varnishcache/varnish70/ubuntu/ $(lsb_release -cs) main" > /etc/apt/sources.list.d/varnish.list
$PKG_UPDATE
$PKG_INSTALL varnish varnish-dev
else
rpm --import https://packagecloud.io/varnishcache/varnish70/gpgkey
cat > /etc/yum.repos.d/varnish.repo << 'EOF'
[varnish70]
name=Varnish Cache 7.0
baseurl=https://packagecloud.io/varnishcache/varnish70/el/$releasever/$basearch
enabled=1
gpgcheck=1
gpgkey=https://packagecloud.io/varnishcache/varnish70/gpgkey
EOF
$PKG_INSTALL varnish varnish-devel
fi
log "[4/8] Configuring Varnish systemd service..."
mkdir -p /etc/systemd/system/varnish.service.d
cat > /etc/systemd/system/varnish.service.d/override.conf << EOF
[Service]
ExecStart=
ExecStart=/usr/sbin/varnishd -a :${VARNISH_PORT} -a :6080 -f /etc/varnish/default.vcl -s malloc,2G -p feature=+esi_ignore_https -p feature=+esi_disable_xml_check
EOF
log "[5/8] Creating Varnish VCL configuration..."
cat > /etc/varnish/default.vcl << 'EOF'
vcl 4.1;
import directors;
import std;
backend web1 {
.host = "BACKEND_PLACEHOLDER";
.port = "PORT_PLACEHOLDER";
.probe = {
.url = "/health";
.timeout = 5s;
.interval = 10s;
.window = 5;
.threshold = 3;
}
}
sub vcl_init {
new cluster = directors.round_robin();
cluster.add_backend(web1);
}
sub vcl_recv {
set req.backend_hint = cluster.backend();
if (req.method == "PURGE") {
return (purge);
}
if (req.method != "GET" && req.method != "HEAD" && req.method != "PUT" && req.method != "POST" && req.method != "TRACE" && req.method != "OPTIONS" && req.method != "DELETE") {
return (pipe);
}
if (req.http.Authorization || req.http.Cookie ~ "SESS|session") {
return (pass);
}
unset req.http.Cookie;
return (hash);
}
sub vcl_backend_response {
if (beresp.status >= 400) {
set beresp.ttl = 0s;
set beresp.grace = 15s;
return(deliver);
}
set beresp.grace = 1h;
unset beresp.http.Server;
unset beresp.http.X-Powered-By;
return(deliver);
}
sub vcl_deliver {
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
set resp.http.X-Served-By = server.hostname;
set resp.http.X-Frame-Options = "SAMEORIGIN";
set resp.http.X-Content-Type-Options = "nosniff";
set resp.http.X-XSS-Protection = "1; mode=block";
unset resp.http.X-Varnish;
unset resp.http.Via;
unset resp.http.Age;
return(deliver);
}
EOF
# Update VCL with first backend server
FIRST_BACKEND=$(echo "$BACKEND_SERVERS" | cut -d',' -f1)
BACKEND_IP=$(echo "$FIRST_BACKEND" | cut -d':' -f1)
BACKEND_PORT=$(echo "$FIRST_BACKEND" | cut -d':' -f2)
sed -i "s/BACKEND_PLACEHOLDER/$BACKEND_IP/g" /etc/varnish/default.vcl
sed -i "s/PORT_PLACEHOLDER/$BACKEND_PORT/g" /etc/varnish/default.vcl
log "[6/8] Installing and configuring HAProxy..."
$PKG_INSTALL haproxy
cat > /etc/haproxy/haproxy.cfg << EOF
global
log 127.0.0.1:514 local0
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
ssl-default-bind-ciphers ECDHE+AESGCM:ECDHE+CHACHA20:RSA+AESGCM:RSA+AES:!aNULL:!MD5:!DSS
ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
tune.ssl.default-dh-param 2048
tune.bufsize 32768
defaults
mode http
log global
option httplog
option dontlognull
option forwardfor
option http-server-close
timeout connect 5000ms
timeout client 50000ms
timeout server 50000ms
frontend web_frontend
bind *:80
bind *:443 ssl crt /etc/ssl/certs/${DOMAIN_NAME}.pem
redirect scheme https if !{ ssl_fc }
default_backend varnish_backend
backend varnish_backend
balance roundrobin
server varnish1 127.0.0.1:${VARNISH_PORT} check
listen stats
bind *:${HAPROXY_STATS_PORT}
stats enable
stats uri /stats
stats refresh 30s
stats admin if TRUE
EOF
log "[7/8] Setting up SSL certificate (self-signed for testing)..."
mkdir -p /etc/ssl/certs
openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
-keyout /etc/ssl/certs/${DOMAIN_NAME}.key \
-out /etc/ssl/certs/${DOMAIN_NAME}.crt \
-subj "/C=US/ST=State/L=City/O=Organization/CN=${DOMAIN_NAME}" 2>/dev/null
cat /etc/ssl/certs/${DOMAIN_NAME}.crt /etc/ssl/certs/${DOMAIN_NAME}.key > /etc/ssl/certs/${DOMAIN_NAME}.pem
chmod 600 /etc/ssl/certs/${DOMAIN_NAME}.*
# Configure firewall
if command -v ufw >/dev/null 2>&1; then
ufw --force enable
ufw allow 80/tcp
ufw allow 443/tcp
ufw allow ${HAPROXY_STATS_PORT}/tcp
elif command -v firewall-cmd >/dev/null 2>&1; then
systemctl start firewalld
systemctl enable firewalld
firewall-cmd --permanent --add-service=http
firewall-cmd --permanent --add-service=https
firewall-cmd --permanent --add-port=${HAPROXY_STATS_PORT}/tcp
firewall-cmd --reload
fi
log "[8/8] Starting services..."
systemctl daemon-reload
systemctl enable varnish haproxy
systemctl start varnish
systemctl start haproxy
# Verification
log "Verifying installation..."
sleep 5
if systemctl is-active --quiet varnish; then
log "✓ Varnish is running"
else
error "✗ Varnish failed to start"
fi
if systemctl is-active --quiet haproxy; then
log "✓ HAProxy is running"
else
error "✗ HAProxy failed to start"
fi
log "Installation completed successfully!"
log "Access your site: https://${DOMAIN_NAME}"
log "HAProxy stats: http://$(hostname -I | awk '{print $1}'):${HAPROXY_STATS_PORT}/stats"
log "Backend servers configured: ${BACKEND_SERVERS}"
warn "Remember to replace the self-signed certificate with a proper SSL certificate for production use"
Review the script before running. Execute with: bash install.sh