Set up Consul WAN federation to replicate services and configuration across multiple datacenters with ACL token replication, health monitoring, and automatic failover capabilities.
Prerequisites
- Multiple servers in different locations
- Root or sudo access
- Network connectivity between datacenters
- Basic understanding of Consul architecture
What this solves
Consul WAN federation connects multiple Consul datacenters for service discovery, configuration replication, and cross-datacenter communication. This setup provides geographic redundancy, disaster recovery capabilities, and centralized service mesh management across distributed infrastructure.
Step-by-step configuration
Install Consul on all datacenter nodes
Install Consul on each node that will participate in the WAN federation. We'll use the official HashiCorp repository for consistent versions.
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
sudo apt update && sudo apt install -y consul
Create Consul user and directories
Set up the required user account and directory structure for Consul to run securely.
sudo useradd --system --home /etc/consul --shell /bin/false consul
sudo mkdir -p /opt/consul /etc/consul.d /var/lib/consul
sudo chown -R consul:consul /opt/consul /etc/consul.d /var/lib/consul
sudo chmod 755 /opt/consul /etc/consul.d /var/lib/consul
Generate encryption keys and certificates
Create the gossip encryption key and generate TLS certificates for secure communication between datacenters.
consul keygen
Save this encryption key for use in all datacenter configurations. Next, generate TLS certificates:
consul tls ca create
consul tls cert create -server -dc dc1
consul tls cert create -server -dc dc2
Configure primary datacenter (DC1)
Create the Consul configuration for the primary datacenter that will be the source of truth for ACL replication.
datacenter = "dc1"
data_dir = "/var/lib/consul"
log_level = "INFO"
node_name = "consul-dc1-01"
bind_addr = "203.0.113.10"
client_addr = "0.0.0.0"
server = true
bootstrap_expect = 3
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "your-gossip-encryption-key-here"
tls {
defaults {
ca_file = "/etc/consul.d/consul-agent-ca.pem"
cert_file = "/etc/consul.d/dc1-server-consul-0.pem"
key_file = "/etc/consul.d/dc1-server-consul-0-key.pem"
verify_incoming = true
verify_outgoing = true
}
internal_rpc {
verify_server_hostname = true
}
}
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
tokens = {
initial_management = "your-bootstrap-token-here"
}
}
retry_join = ["203.0.113.11", "203.0.113.12"]
retry_join_wan = ["203.0.113.20", "203.0.113.21", "203.0.113.22"]
ports {
grpc = 8502
grpc_tls = 8503
}
performance {
raft_multiplier = 1
}
Configure secondary datacenter (DC2)
Configure the secondary datacenter to replicate from the primary and participate in WAN federation.
datacenter = "dc2"
data_dir = "/var/lib/consul"
log_level = "INFO"
node_name = "consul-dc2-01"
bind_addr = "203.0.113.20"
client_addr = "0.0.0.0"
server = true
bootstrap_expect = 3
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "your-gossip-encryption-key-here"
tls {
defaults {
ca_file = "/etc/consul.d/consul-agent-ca.pem"
cert_file = "/etc/consul.d/dc2-server-consul-0.pem"
key_file = "/etc/consul.d/dc2-server-consul-0-key.pem"
verify_incoming = true
verify_outgoing = true
}
internal_rpc {
verify_server_hostname = true
}
}
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
enable_token_replication = true
tokens = {
replication = "your-replication-token-here"
}
}
primary_datacenter = "dc1"
retry_join = ["203.0.113.21", "203.0.113.22"]
retry_join_wan = ["203.0.113.10", "203.0.113.11", "203.0.113.12"]
ports {
grpc = 8502
grpc_tls = 8503
}
performance {
raft_multiplier = 1
}
Set up systemd service files
Create systemd unit files to manage Consul as a system service with proper resource limits.
[Unit]
Description=Consul
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl
[Service]
Type=notify
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target
Configure firewall rules
Open the required ports for Consul communication between datacenters.
sudo ufw allow 8300/tcp comment "Consul server RPC"
sudo ufw allow 8301/tcp comment "Consul serf LAN"
sudo ufw allow 8301/udp comment "Consul serf LAN"
sudo ufw allow 8302/tcp comment "Consul serf WAN"
sudo ufw allow 8302/udp comment "Consul serf WAN"
sudo ufw allow 8500/tcp comment "Consul HTTP API"
sudo ufw allow 8501/tcp comment "Consul HTTPS API"
sudo ufw allow 8502/tcp comment "Consul gRPC"
sudo ufw allow 8503/tcp comment "Consul gRPC TLS"
sudo ufw reload
Start Consul services
Enable and start Consul on all nodes, starting with the primary datacenter first.
sudo systemctl daemon-reload
sudo systemctl enable consul
sudo systemctl start consul
sudo systemctl status consul
Bootstrap ACL system
Initialize the ACL system on the primary datacenter and create replication tokens.
consul acl bootstrap
Save the bootstrap token and create a replication token for the secondary datacenter:
export CONSUL_HTTP_TOKEN="your-bootstrap-token-here"
consul acl policy create -name "replication" -rules 'acl = "write" operator = "write" service_prefix "" { policy = "read" intentions = "read" } node_prefix "" { policy = "write" } namespace_prefix "" { policy = "read" }'
consul acl token create -description "ACL Token Replication" -policy-name "replication"
Configure ACL token replication
Set up automatic ACL token replication from primary to secondary datacenter.
consul acl replication-token create -description "DC2 Replication Token"
Update the secondary datacenter configuration to include the replication token and restart Consul:
sudo systemctl restart consul
Join datacenters via WAN
Connect the datacenters using WAN federation to enable cross-datacenter service discovery.
consul join -wan 203.0.113.20
Verify the WAN federation status:
consul members -wan
Configure health monitoring
Set up cross-datacenter health checks and monitoring for service failover capabilities.
{
"checks": [
{
"id": "wan-connectivity",
"name": "WAN Connectivity Check",
"script": "consul members -wan | grep -q alive",
"interval": "30s",
"timeout": "10s"
},
{
"id": "acl-replication",
"name": "ACL Replication Status",
"http": "https://localhost:8501/v1/acl/replication?token=your-token-here",
"tls_skip_verify": false,
"interval": "60s",
"timeout": "10s"
}
]
}
Configure automatic failover
Set up prepared queries for automatic service failover between datacenters.
consul prepared-query create -name="web-failover" -service="web" -failover-datacenters="dc2" -token="your-token-here"
Create a sample service registration for testing:
{
"service": {
"name": "web",
"tags": ["v1"],
"port": 80,
"check": {
"http": "http://localhost:80/health",
"interval": "10s"
}
}
}
Verify your setup
Check that WAN federation is working correctly and services are replicating across datacenters:
consul members -wan
consul catalog services
consul acl replication-status
consul operator raft list-peers
Test cross-datacenter service discovery:
dig @127.0.0.1 -p 8600 web.service.dc2.consul
consul catalog services -datacenter=dc2
Monitor health checks and replication status:
consul monitor -log-level=INFO
curl -k https://127.0.0.1:8501/v1/health/state/any
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| WAN join fails | Firewall blocking ports | Ensure ports 8302 TCP/UDP are open between datacenters |
| ACL replication not working | Missing or invalid replication token | Check token permissions with consul acl token read -id TOKEN |
| Service discovery fails across DC | DNS configuration incorrect | Verify DNS forwarding to port 8600 and check service registration |
| TLS certificate errors | Hostname verification failing | Ensure certificates match server hostnames and CA is properly distributed |
| Raft leader election issues | Network partitions or clock drift | Check NTP synchronization and network connectivity between nodes |
| High memory usage | Large number of services/nodes | Tune performance.raft_multiplier and enable metrics monitoring |
Next steps
- Monitor Consul with Prometheus and Grafana for comprehensive observability
- Configure Consul Connect service mesh with Envoy for secure microservices communication
- Implement Consul backup and disaster recovery for production resilience
- Configure Consul mesh gateways for cross-datacenter communication
- Setup Consul intentions and service segmentation
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Usage function
usage() {
echo "Usage: $0 <datacenter_name> <node_ip> <node_name> [primary_dc_ip]"
echo " datacenter_name: Name of the datacenter (e.g., dc1, dc2)"
echo " node_ip: IP address of this Consul node"
echo " node_name: Name of this Consul node"
echo " primary_dc_ip: IP of primary datacenter (required for secondary DCs)"
echo ""
echo "Example:"
echo " $0 dc1 203.0.113.10 consul-dc1-01"
echo " $0 dc2 203.0.113.20 consul-dc2-01 203.0.113.10"
exit 1
}
# Check arguments
if [ $# -lt 3 ] || [ $# -gt 4 ]; then
usage
fi
DATACENTER="$1"
NODE_IP="$2"
NODE_NAME="$3"
PRIMARY_DC_IP="${4:-}"
# Check if running as root
if [ "$EUID" -ne 0 ]; then
echo -e "${RED}Error: This script must be run as root${NC}"
exit 1
fi
# Detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
FIREWALL_CMD="ufw"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf check-update || true"
PKG_INSTALL="dnf install -y"
FIREWALL_CMD="firewall-cmd"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum check-update || true"
PKG_INSTALL="yum install -y"
FIREWALL_CMD="firewall-cmd"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
# Cleanup function
cleanup() {
echo -e "${RED}Installation failed. Cleaning up...${NC}"
systemctl stop consul 2>/dev/null || true
systemctl disable consul 2>/dev/null || true
userdel consul 2>/dev/null || true
rm -rf /opt/consul /etc/consul.d /var/lib/consul
echo -e "${YELLOW}Cleanup completed${NC}"
}
trap cleanup ERR
echo -e "${GREEN}Starting Consul multi-datacenter installation for $DATACENTER${NC}"
# Step 1: Install prerequisites
echo -e "${GREEN}[1/10] Installing prerequisites...${NC}"
$PKG_UPDATE
if [ "$PKG_MGR" = "apt" ]; then
$PKG_INSTALL curl wget gnupg lsb-release
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" > /etc/apt/sources.list.d/hashicorp.list
apt update
else
$PKG_INSTALL dnf-plugins-core curl wget
if [ "$PKG_MGR" = "dnf" ]; then
dnf config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
else
yum-config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
fi
fi
# Step 2: Install Consul
echo -e "${GREEN}[2/10] Installing Consul...${NC}"
$PKG_INSTALL consul
# Step 3: Create Consul user and directories
echo -e "${GREEN}[3/10] Creating Consul user and directories...${NC}"
if ! id consul &>/dev/null; then
useradd --system --home /etc/consul --shell /bin/false consul
fi
mkdir -p /opt/consul /etc/consul.d /var/lib/consul
chown -R consul:consul /opt/consul /etc/consul.d /var/lib/consul
chmod 755 /opt/consul /etc/consul.d /var/lib/consul
# Step 4: Generate encryption key
echo -e "${GREEN}[4/10] Generating encryption key...${NC}"
GOSSIP_KEY=$(consul keygen)
echo -e "${YELLOW}Save this gossip encryption key: $GOSSIP_KEY${NC}"
# Step 5: Generate TLS certificates
echo -e "${GREEN}[5/10] Generating TLS certificates...${NC}"
cd /etc/consul.d
consul tls ca create
consul tls cert create -server -dc "$DATACENTER"
chown consul:consul /etc/consul.d/*.pem
chmod 600 /etc/consul.d/*-key.pem
chmod 644 /etc/consul.d/*.pem
# Step 6: Generate bootstrap token
echo -e "${GREEN}[6/10] Generating bootstrap token...${NC}"
BOOTSTRAP_TOKEN=$(openssl rand -hex 16)
echo -e "${YELLOW}Save this bootstrap token: $BOOTSTRAP_TOKEN${NC}"
# Step 7: Create Consul configuration
echo -e "${GREEN}[7/10] Creating Consul configuration...${NC}"
cat > /etc/consul.d/consul.hcl << EOF
datacenter = "$DATACENTER"
data_dir = "/var/lib/consul"
log_level = "INFO"
node_name = "$NODE_NAME"
bind_addr = "$NODE_IP"
client_addr = "0.0.0.0"
server = true
bootstrap_expect = 1
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "$GOSSIP_KEY"
tls {
defaults {
ca_file = "/etc/consul.d/consul-agent-ca.pem"
cert_file = "/etc/consul.d/$DATACENTER-server-consul-0.pem"
key_file = "/etc/consul.d/$DATACENTER-server-consul-0-key.pem"
verify_incoming = true
verify_outgoing = true
}
internal_rpc {
verify_server_hostname = true
}
}
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
EOF
if [ -n "$PRIMARY_DC_IP" ]; then
# Secondary datacenter configuration
cat >> /etc/consul.d/consul.hcl << EOF
enable_token_replication = true
}
primary_datacenter = "dc1"
retry_join_wan = ["$PRIMARY_DC_IP"]
EOF
else
# Primary datacenter configuration
cat >> /etc/consul.d/consul.hcl << EOF
tokens = {
initial_management = "$BOOTSTRAP_TOKEN"
}
}
EOF
fi
cat >> /etc/consul.d/consul.hcl << EOF
ports {
grpc = 8502
grpc_tls = 8503
}
performance {
raft_multiplier = 1
}
EOF
chown consul:consul /etc/consul.d/consul.hcl
chmod 640 /etc/consul.d/consul.hcl
# Step 8: Configure firewall
echo -e "${GREEN}[8/10] Configuring firewall...${NC}"
CONSUL_PORTS=(8300 8301 8302 8500 8501 8502 8503 8600)
if [ "$FIREWALL_CMD" = "ufw" ]; then
if systemctl is-active --quiet ufw; then
for port in "${CONSUL_PORTS[@]}"; do
ufw allow "$port" >/dev/null 2>&1 || true
done
fi
else
if systemctl is-active --quiet firewalld; then
for port in "${CONSUL_PORTS[@]}"; do
firewall-cmd --permanent --add-port="$port"/tcp >/dev/null 2>&1 || true
firewall-cmd --permanent --add-port="$port"/udp >/dev/null 2>&1 || true
done
firewall-cmd --reload >/dev/null 2>&1 || true
fi
fi
# Step 9: Start and enable Consul
echo -e "${GREEN}[9/10] Starting Consul service...${NC}"
systemctl daemon-reload
systemctl enable consul
systemctl start consul
# Step 10: Verify installation
echo -e "${GREEN}[10/10] Verifying installation...${NC}"
sleep 10
if systemctl is-active --quiet consul; then
echo -e "${GREEN}✓ Consul service is running${NC}"
else
echo -e "${RED}✗ Consul service is not running${NC}"
exit 1
fi
if consul members >/dev/null 2>&1; then
echo -e "${GREEN}✓ Consul cluster is accessible${NC}"
else
echo -e "${YELLOW}⚠ Consul cluster may still be initializing${NC}"
fi
echo -e "${GREEN}Consul installation completed successfully!${NC}"
echo ""
echo -e "${YELLOW}Important information:${NC}"
echo "Gossip encryption key: $GOSSIP_KEY"
echo "Bootstrap token: $BOOTSTRAP_TOKEN"
echo "Web UI: https://$NODE_IP:8501"
echo ""
echo -e "${YELLOW}Next steps:${NC}"
if [ -z "$PRIMARY_DC_IP" ]; then
echo "1. Configure additional nodes in this datacenter"
echo "2. Set up secondary datacenters using this node as primary"
else
echo "1. Configure ACL replication token on primary datacenter"
echo "2. Join this datacenter to the WAN federation"
fi
echo "3. Configure service mesh and policies as needed"
Review the script before running. Execute with: bash install.sh