Configure Consul WAN federation across multiple datacenters with secure ACL token replication. Enable automatic token synchronization, gossip encryption, and cross-datacenter service discovery for distributed infrastructure.
Prerequisites
- At least 6 servers (3 per datacenter)
- Network connectivity between datacenters on ports 8300-8302
- Root or sudo access on all servers
- Basic understanding of Consul architecture
What this solves
Consul multi-datacenter replication allows you to federate multiple Consul clusters across different geographic locations or network segments. This setup enables global service discovery, configuration sharing, and ACL token synchronization between datacenters while maintaining network segmentation and local performance.
Step-by-step configuration
Install Consul on all nodes
Install Consul on servers in both datacenters using the HashiCorp repository.
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
sudo apt update
sudo apt install -y consul
Generate encryption keys
Create a gossip encryption key and bootstrap ACL token that will be shared across all datacenters.
consul keygen
consul keygen
Configure primary datacenter (dc1)
Set up the primary datacenter configuration with ACL system enabled.
datacenter = "dc1"
data_dir = "/opt/consul"
log_level = "INFO"
server = true
bootstrap_expect = 3
bind_addr = "203.0.113.10"
client_addr = "0.0.0.0"
retry_join = ["203.0.113.11", "203.0.113.12"]
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "your-gossip-encryption-key-here"
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
enable_token_replication = true
}
ports {
grpc = 8502
}
Configure secondary datacenter (dc2)
Set up the secondary datacenter to replicate from the primary datacenter.
datacenter = "dc2"
data_dir = "/opt/consul"
log_level = "INFO"
server = true
bootstrap_expect = 3
bind_addr = "198.51.100.10"
client_addr = "0.0.0.0"
retry_join = ["198.51.100.11", "198.51.100.12"]
retry_join_wan = ["203.0.113.10", "203.0.113.11", "203.0.113.12"]
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "your-gossip-encryption-key-here"
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
enable_token_replication = true
tokens {
replication = "your-replication-token-here"
}
}
ports {
grpc = 8502
}
primary_datacenter = "dc1"
Create systemd service
Configure Consul to run as a system service with proper user permissions.
sudo useradd --system --home /etc/consul.d --shell /bin/false consul
sudo mkdir -p /opt/consul
sudo chown -R consul:consul /opt/consul /etc/consul.d
sudo chmod 640 /etc/consul.d/consul.hcl
[Unit]
Description=Consul
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl
[Service]
Type=notify
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
Start Consul clusters
Enable and start Consul on all servers in the primary datacenter first.
sudo systemctl daemon-reload
sudo systemctl enable consul
sudo systemctl start consul
sudo systemctl status consul
Bootstrap ACL system in primary datacenter
Initialize the ACL system and create the bootstrap token on the leader server in dc1.
consul acl bootstrap
Create replication token
Create a dedicated token for ACL replication with the necessary permissions.
export CONSUL_HTTP_TOKEN="your-bootstrap-token-here"
consul acl policy create \
-name "replication-policy" \
-description "ACL replication policy" \
-rules @- << 'EOF'
acl = "write"
operator = "write"
service_prefix "" {
policy = "read"
intentions = "read"
}
EOF
consul acl token create \
-description "ACL replication token" \
-policy-name "replication-policy"
Update secondary datacenter configuration
Add the replication token to the secondary datacenter configuration.
datacenter = "dc2"
data_dir = "/opt/consul"
log_level = "INFO"
server = true
bootstrap_expect = 3
bind_addr = "198.51.100.10"
client_addr = "0.0.0.0"
retry_join = ["198.51.100.11", "198.51.100.12"]
retry_join_wan = ["203.0.113.10", "203.0.113.11", "203.0.113.12"]
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "your-gossip-encryption-key-here"
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
enable_token_replication = true
tokens {
replication = "your-actual-replication-token-secretid"
}
}
ports {
grpc = 8502
}
primary_datacenter = "dc1"
Start secondary datacenter
Start all servers in the secondary datacenter to join the WAN federation.
sudo systemctl reload consul
sudo systemctl start consul
sudo systemctl status consul
Configure firewall rules
Open the necessary ports for Consul federation between datacenters.
sudo ufw allow 8300/tcp comment "Consul server RPC"
sudo ufw allow 8301/tcp comment "Consul LAN gossip"
sudo ufw allow 8301/udp comment "Consul LAN gossip"
sudo ufw allow 8302/tcp comment "Consul WAN gossip"
sudo ufw allow 8302/udp comment "Consul WAN gossip"
sudo ufw allow 8500/tcp comment "Consul HTTP API"
sudo ufw allow 8502/tcp comment "Consul gRPC API"
sudo ufw reload
Create service tokens for applications
Create tokens for services that need to register across datacenters.
consul acl policy create \
-name "service-policy" \
-description "Policy for service registration" \
-rules @- << 'EOF'
service_prefix "" {
policy = "write"
}
node_prefix "" {
policy = "read"
}
EOF
consul acl token create \
-description "Service registration token" \
-policy-name "service-policy"
Verify multi-datacenter setup
export CONSUL_HTTP_TOKEN="your-bootstrap-token-here"
consul members -wan
consul catalog datacenters
consul acl token list
Test cross-datacenter service discovery:
consul catalog services -datacenter=dc1
consul catalog services -datacenter=dc2
consul catalog nodes -datacenter=dc1
consul catalog nodes -datacenter=dc2
Configure ACL token replication monitoring
Monitor the replication status and configure alerts for replication failures.
consul acl replication-status
curl -H "X-Consul-Token: your-token" http://localhost:8500/v1/acl/replication
Check replication lag and last success time:
consul operator raft list-peers
consul info | grep replication
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Replication not working | Wrong replication token | Verify token has acl:write and operator:write permissions |
| WAN federation fails | Firewall blocking port 8302 | Open UDP/TCP 8302 between datacenters |
| Services not visible across DCs | ACL policy too restrictive | Grant service:read permissions to query token |
| Bootstrap token not working | ACL system not properly initialized | Run consul acl bootstrap on cluster leader |
| Secondary DC won't join | Gossip key mismatch | Ensure same encrypt key on all nodes |
| Replication lag increasing | Network issues or high load | Check network connectivity and server resources |
Monitor and troubleshoot federation
Set up monitoring for federation health and ACL replication status. You can integrate this with Consul monitoring using Prometheus and Grafana for comprehensive observability.
consul monitor -log-level=DEBUG | grep -i replication
consul debug -duration=30s -output=/tmp/consul-debug
Monitor federation metrics:
curl http://localhost:8500/v1/status/leader
curl http://localhost:8500/v1/catalog/datacenters
curl -H "X-Consul-Token: your-token" http://localhost:8500/v1/acl/replication
Backup and disaster recovery
Implement automated backup strategies for your federated Consul clusters. Consider setting up Consul backup and disaster recovery procedures to protect your federation configuration.
consul snapshot save backup-$(date +%Y%m%d-%H%M%S).snap
consul snapshot inspect backup-*.snap
Next steps
- Configure Consul Connect with Kubernetes integration for service mesh across datacenters
- Implement Consul Connect mTLS with Vault PKI backend for enhanced security
- Configure automated Consul backup and disaster recovery for production resilience
- Configure Consul multi-datacenter service mesh with Envoy proxy
- Monitor Consul federation health with Prometheus and alerting
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Consul Multi-Datacenter Setup with ACL Token Replication
# Usage: ./install_consul_multidc.sh <datacenter> <bind_ip> <primary_dc_ips...>
# Example: ./install_consul_multidc.sh dc1 203.0.113.10 203.0.113.11,203.0.113.12
# Example: ./install_consul_multidc.sh dc2 198.51.100.10 203.0.113.10,203.0.113.11,203.0.113.12
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Global variables
DATACENTER=""
BIND_IP=""
PRIMARY_DC_IPS=""
GOSSIP_KEY=""
REPLICATION_TOKEN=""
cleanup() {
echo -e "${RED}[ERROR] Installation failed. Rolling back...${NC}"
systemctl stop consul 2>/dev/null || true
systemctl disable consul 2>/dev/null || true
rm -f /etc/systemd/system/consul.service
rm -rf /etc/consul.d /opt/consul
userdel consul 2>/dev/null || true
groupdel consul 2>/dev/null || true
}
trap cleanup ERR
usage() {
echo "Usage: $0 <datacenter> <bind_ip> <primary_dc_ips>"
echo " datacenter: dc1 (primary) or dc2 (secondary)"
echo " bind_ip: IP address to bind Consul to"
echo " primary_dc_ips: Comma-separated list of primary DC server IPs"
echo ""
echo "Example: $0 dc1 203.0.113.10 203.0.113.11,203.0.113.12"
echo "Example: $0 dc2 198.51.100.10 203.0.113.10,203.0.113.11,203.0.113.12"
exit 1
}
if [[ $# -lt 3 ]]; then
usage
fi
DATACENTER="$1"
BIND_IP="$2"
PRIMARY_DC_IPS="$3"
if [[ "$DATACENTER" != "dc1" && "$DATACENTER" != "dc2" ]]; then
echo -e "${RED}Error: datacenter must be 'dc1' or 'dc2'${NC}"
exit 1
fi
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root${NC}"
exit 1
fi
# Detect distribution
if [[ -f /etc/os-release ]]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf makecache"
PKG_INSTALL="dnf install -y"
if ! command -v dnf &> /dev/null; then
PKG_MGR="yum"
PKG_UPDATE="yum makecache fast"
PKG_INSTALL="yum install -y"
fi
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum makecache fast"
PKG_INSTALL="yum install -y"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}[1/9] Installing Consul...${NC}"
if [[ "$PKG_MGR" == "apt" ]]; then
$PKG_UPDATE
$PKG_INSTALL curl gnupg lsb-release
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" > /etc/apt/sources.list.d/hashicorp.list
$PKG_UPDATE
$PKG_INSTALL consul
else
$PKG_INSTALL dnf-plugins-core 2>/dev/null || $PKG_INSTALL yum-utils
if [[ "$PKG_MGR" == "dnf" ]]; then
dnf config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
else
yum-config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
fi
$PKG_INSTALL consul
fi
echo -e "${GREEN}[2/9] Creating consul user and directories...${NC}"
if ! id consul &>/dev/null; then
useradd --system --home /etc/consul.d --shell /bin/false consul
fi
mkdir -p /etc/consul.d /opt/consul
chown -R consul:consul /opt/consul /etc/consul.d
chmod 755 /etc/consul.d /opt/consul
echo -e "${GREEN}[3/9] Generating encryption keys...${NC}"
if [[ -z "${CONSUL_GOSSIP_KEY:-}" ]]; then
GOSSIP_KEY=$(consul keygen)
echo -e "${YELLOW}Generated gossip key: $GOSSIP_KEY${NC}"
echo -e "${YELLOW}Save this key - all servers need the same key!${NC}"
else
GOSSIP_KEY="$CONSUL_GOSSIP_KEY"
fi
if [[ "$DATACENTER" == "dc2" && -z "${CONSUL_REPLICATION_TOKEN:-}" ]]; then
echo -e "${YELLOW}For secondary datacenter, you need to provide CONSUL_REPLICATION_TOKEN${NC}"
echo -e "${YELLOW}Export it as environment variable before running this script${NC}"
exit 1
elif [[ "$DATACENTER" == "dc2" ]]; then
REPLICATION_TOKEN="$CONSUL_REPLICATION_TOKEN"
fi
echo -e "${GREEN}[4/9] Creating Consul configuration...${NC}"
# Convert comma-separated IPs to proper format
IFS=',' read -ra IP_ARRAY <<< "$PRIMARY_DC_IPS"
RETRY_JOIN_LIST=""
for ip in "${IP_ARRAY[@]}"; do
RETRY_JOIN_LIST="$RETRY_JOIN_LIST\"$ip\", "
done
RETRY_JOIN_LIST=${RETRY_JOIN_LIST%, }
cat > /etc/consul.d/consul.hcl << EOF
datacenter = "$DATACENTER"
data_dir = "/opt/consul"
log_level = "INFO"
server = true
bootstrap_expect = 3
bind_addr = "$BIND_IP"
client_addr = "0.0.0.0"
retry_join = [$RETRY_JOIN_LIST]
ui_config {
enabled = true
}
connect {
enabled = true
}
encrypt = "$GOSSIP_KEY"
acl = {
enabled = true
default_policy = "deny"
enable_token_persistence = true
enable_token_replication = true
EOF
if [[ "$DATACENTER" == "dc2" ]]; then
cat >> /etc/consul.d/consul.hcl << EOF
tokens {
replication = "$REPLICATION_TOKEN"
}
EOF
fi
cat >> /etc/consul.d/consul.hcl << EOF
}
ports {
grpc = 8502
}
EOF
if [[ "$DATACENTER" == "dc2" ]]; then
echo 'primary_datacenter = "dc1"' >> /etc/consul.d/consul.hcl
echo "retry_join_wan = [$RETRY_JOIN_LIST]" >> /etc/consul.d/consul.hcl
fi
chown consul:consul /etc/consul.d/consul.hcl
chmod 640 /etc/consul.d/consul.hcl
echo -e "${GREEN}[5/9] Creating systemd service...${NC}"
cat > /etc/systemd/system/consul.service << 'EOF'
[Unit]
Description=Consul
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl
[Service]
Type=notify
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
echo -e "${GREEN}[6/9] Configuring firewall...${NC}"
if command -v firewall-cmd &> /dev/null; then
firewall-cmd --permanent --add-port=8300-8302/tcp
firewall-cmd --permanent --add-port=8300-8302/udp
firewall-cmd --permanent --add-port=8500/tcp
firewall-cmd --permanent --add-port=8502/tcp
firewall-cmd --permanent --add-port=8600/tcp
firewall-cmd --permanent --add-port=8600/udp
firewall-cmd --reload
elif command -v ufw &> /dev/null; then
ufw allow 8300:8302/tcp
ufw allow 8300:8302/udp
ufw allow 8500/tcp
ufw allow 8502/tcp
ufw allow 8600/tcp
ufw allow 8600/udp
fi
echo -e "${GREEN}[7/9] Starting Consul service...${NC}"
systemctl daemon-reload
systemctl enable consul
systemctl start consul
echo -e "${GREEN}[8/9] Waiting for Consul to start...${NC}"
sleep 10
echo -e "${GREEN}[9/9] Verifying installation...${NC}"
if systemctl is-active --quiet consul; then
echo -e "${GREEN}✓ Consul service is running${NC}"
else
echo -e "${RED}✗ Consul service is not running${NC}"
systemctl status consul
exit 1
fi
if consul members &>/dev/null; then
echo -e "${GREEN}✓ Consul is responding to API calls${NC}"
consul members
else
echo -e "${RED}✗ Consul is not responding${NC}"
exit 1
fi
echo -e "${GREEN}Installation completed successfully!${NC}"
echo -e "${YELLOW}Next steps:${NC}"
if [[ "$DATACENTER" == "dc1" ]]; then
echo "1. Wait for all dc1 servers to join the cluster"
echo "2. Bootstrap ACL system: consul acl bootstrap"
echo "3. Create replication token for dc2"
echo "4. Web UI: http://$BIND_IP:8500"
else
echo "1. Verify dc2 servers have joined: consul members"
echo "2. Check WAN federation: consul members -wan"
echo "3. Web UI: http://$BIND_IP:8500"
fi
Review the script before running. Execute with: bash install.sh