Deploy a production-ready HashiCorp Nomad cluster with server and client nodes, TLS encryption, and Consul integration for service discovery. Includes workload deployment and monitoring configuration.
Prerequisites
- Root or sudo access
- 3+ servers for production cluster
- Docker installed on client nodes
- Basic understanding of containers and networking
What this solves
HashiCorp Nomad provides container orchestration and workload scheduling across multiple nodes. This tutorial sets up a production-ready Nomad cluster with TLS encryption for secure communication and Consul integration for service discovery and cluster coordination.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you get the latest versions.
sudo apt update && sudo apt upgrade -y
sudo apt install -y curl unzip
Install HashiCorp GPG key and repository
Add the official HashiCorp repository to get the latest stable versions of Nomad and Consul.
wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
sudo apt update
Install Nomad and Consul
Install both Nomad for container orchestration and Consul for service discovery.
sudo apt install -y nomad consul
Create Nomad and Consul directories
Set up the directory structure for configuration files, data, and certificates.
sudo mkdir -p /etc/nomad.d /opt/nomad/data /etc/consul.d /opt/consul/data
sudo mkdir -p /opt/nomad/tls /opt/consul/tls
sudo chown -R nomad:nomad /etc/nomad.d /opt/nomad
sudo chown -R consul:consul /etc/consul.d /opt/consul
Generate TLS certificates for Nomad
Create a Certificate Authority and generate certificates for secure cluster communication.
cd /opt/nomad/tls
sudo nomad tls ca create
sudo nomad tls cert create -server -region global -dc dc1
sudo nomad tls cert create -client -region global -dc dc1
sudo chown -R nomad:nomad /opt/nomad/tls
sudo chmod 600 /opt/nomad/tls/*.pem
Generate Consul encryption key
Create a gossip encryption key for secure Consul cluster communication.
CONSUL_ENCRYPT_KEY=$(consul keygen)
echo "Consul encryption key: $CONSUL_ENCRYPT_KEY"
Configure Consul cluster
Configure Consul server (on server nodes)
Create the Consul server configuration for cluster coordination and service discovery.
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
server = true
bootstrap_expect = 3
ui_config {
enabled = true
}
bind_addr = "{{ GetInterfaceIP \"eth0\" }}"
client_addr = "0.0.0.0"
retry_join = ["203.0.113.10", "203.0.113.11", "203.0.113.12"]
encrypt = "REPLACE_WITH_CONSUL_ENCRYPT_KEY"
ca_file = "/opt/consul/tls/ca-cert.pem"
cert_file = "/opt/consul/tls/server.pem"
key_file = "/opt/consul/tls/server-key.pem"
verify_incoming = true
verify_outgoing = true
verify_server_hostname = true
connect {
enabled = true
}
Configure Consul client (on client nodes)
Create the Consul client configuration for nodes that will run Nomad clients.
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
server = false
bind_addr = "{{ GetInterfaceIP \"eth0\" }}"
retry_join = ["203.0.113.10", "203.0.113.11", "203.0.113.12"]
encrypt = "REPLACE_WITH_CONSUL_ENCRYPT_KEY"
ca_file = "/opt/consul/tls/ca-cert.pem"
cert_file = "/opt/consul/tls/client.pem"
key_file = "/opt/consul/tls/client-key.pem"
verify_incoming = true
verify_outgoing = true
connect {
enabled = true
}
Generate Consul TLS certificates
Create TLS certificates for secure Consul communication.
cd /opt/consul/tls
sudo consul tls ca create
sudo consul tls cert create -server -dc dc1
sudo consul tls cert create -client -dc dc1
sudo chown -R consul:consul /opt/consul/tls
sudo chmod 600 /opt/consul/tls/*.pem
Configure Nomad cluster
Configure Nomad server (on server nodes)
Create the Nomad server configuration with TLS encryption and Consul integration.
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = ["203.0.113.10:4648", "203.0.113.11:4648", "203.0.113.12:4648"]
}
}
client {
enabled = false
}
consul {
address = "127.0.0.1:8500"
ssl = true
ca_file = "/opt/consul/tls/ca-cert.pem"
cert_file = "/opt/consul/tls/client.pem"
key_file = "/opt/consul/tls/client-key.pem"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
tls {
http = true
rpc = true
ca_file = "/opt/nomad/tls/nomad-agent-ca.pem"
cert_file = "/opt/nomad/tls/global-server-nomad.pem"
key_file = "/opt/nomad/tls/global-server-nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
ui_config {
enabled = true
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
Configure Nomad client (on client nodes)
Create the Nomad client configuration for nodes that will run workloads.
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
bind_addr = "0.0.0.0"
server {
enabled = false
}
client {
enabled = true
servers = ["203.0.113.10:4647", "203.0.113.11:4647", "203.0.113.12:4647"]
node_class = "worker"
options {
"driver.raw_exec.enable" = "1"
"driver.docker.enable" = "1"
}
}
consul {
address = "127.0.0.1:8500"
ssl = true
ca_file = "/opt/consul/tls/ca-cert.pem"
cert_file = "/opt/consul/tls/client.pem"
key_file = "/opt/consul/tls/client-key.pem"
auto_advertise = true
client_auto_join = true
}
tls {
http = true
rpc = true
ca_file = "/opt/nomad/tls/nomad-agent-ca.pem"
cert_file = "/opt/nomad/tls/global-client-nomad.pem"
key_file = "/opt/nomad/tls/global-client-nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
Install Docker on client nodes
Install Docker to run containerized workloads on Nomad clients.
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker nomad
sudo systemctl enable --now docker
Configure systemd services
Create systemd service files for Consul and Nomad with proper dependencies.
[Unit]
Description=Consul
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl
[Service]
Type=notify
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
Create Nomad systemd service
Configure the Nomad systemd service with proper startup order.
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/docs/
Requires=network-online.target
After=network-online.target consul.service
Wants=consul.service
ConditionFileNotEmpty=/etc/nomad.d/nomad.hcl
[Service]
Type=notify
User=nomad
Group=nomad
ExecStart=/usr/bin/nomad agent -config=/etc/nomad.d/nomad.hcl
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
KillSignal=SIGINT
[Install]
WantedBy=multi-user.target
Start the services
Enable and start Consul first, then Nomad to ensure proper cluster formation.
sudo systemctl daemon-reload
sudo systemctl enable --now consul
sudo systemctl status consul
sleep 10
sudo systemctl enable --now nomad
sudo systemctl status nomad
Configure firewall rules
Open required ports
Configure firewall rules for Consul and Nomad cluster communication.
sudo ufw allow 8300/tcp comment 'Consul server RPC'
sudo ufw allow 8301/tcp comment 'Consul serf LAN'
sudo ufw allow 8301/udp comment 'Consul serf LAN'
sudo ufw allow 8302/tcp comment 'Consul serf WAN'
sudo ufw allow 8302/udp comment 'Consul serf WAN'
sudo ufw allow 8500/tcp comment 'Consul HTTP API'
sudo ufw allow 8600/tcp comment 'Consul DNS'
sudo ufw allow 8600/udp comment 'Consul DNS'
sudo ufw allow 4646/tcp comment 'Nomad HTTP API'
sudo ufw allow 4647/tcp comment 'Nomad RPC'
sudo ufw allow 4648/tcp comment 'Nomad serf'
sudo ufw allow 20000:32000/tcp comment 'Nomad dynamic ports'
sudo ufw --force enable
Deploy and monitor workloads
Deploy a sample application
Create a simple Nomad job to test cluster functionality.
job "nginx-example" {
datacenters = ["dc1"]
type = "service"
group "web" {
count = 3
network {
port "http" {
static = 8080
to = 80
}
}
service {
name = "nginx-web"
port = "http"
tags = ["web", "nginx"]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "3s"
}
}
task "nginx" {
driver = "docker"
config {
image = "nginx:alpine"
ports = ["http"]
}
resources {
cpu = 100
memory = 128
}
}
}
}
Submit the job
Deploy the application using the Nomad CLI with TLS certificates.
export NOMAD_ADDR=https://203.0.113.10:4646
export NOMAD_CACERT=/opt/nomad/tls/nomad-agent-ca.pem
export NOMAD_CLIENT_CERT=/opt/nomad/tls/global-client-nomad.pem
export NOMAD_CLIENT_KEY=/opt/nomad/tls/global-client-nomad-key.pem
nomad job run /tmp/nginx-example.nomad
nomad job status nginx-example
Configure service monitoring
Set up Prometheus configuration to scrape Nomad and Consul metrics. You can integrate this with Prometheus and Grafana monitoring for comprehensive observability.
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'nomad'
consul_sd_configs:
- server: '203.0.113.10:8500'
scheme: https
tls_config:
ca_file: /opt/consul/tls/ca-cert.pem
cert_file: /opt/consul/tls/client.pem
key_file: /opt/consul/tls/client-key.pem
services: ['nomad']
relabel_configs:
- source_labels: [__meta_consul_service]
target_label: job
- job_name: 'consul'
consul_sd_configs:
- server: '203.0.113.10:8500'
scheme: https
tls_config:
ca_file: /opt/consul/tls/ca-cert.pem
cert_file: /opt/consul/tls/client.pem
key_file: /opt/consul/tls/client-key.pem
services: ['consul']
metrics_path: /v1/agent/metrics
params:
format: ['prometheus']
Verify your setup
# Check cluster status
nomad server members
nomad node status
consul members
Verify TLS is working
curl -k https://203.0.113.10:4646/v1/status/leader
curl -k https://203.0.113.10:8500/v1/status/leader
Check job status
nomad job status
nomad alloc status
Test service discovery
consul catalog services
consul catalog nodes
Access the web interfaces at https://203.0.113.10:4646 for Nomad and https://203.0.113.10:8500 for Consul.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Nodes not joining cluster | Firewall blocking ports | Check firewall rules and ensure all required ports are open |
| TLS certificate errors | Certificate paths wrong | Verify certificate files exist and have correct permissions (600) |
| Docker jobs failing | Docker not accessible | Add nomad user to docker group: sudo usermod -aG docker nomad |
| Consul health checks failing | Service registration issues | Check Consul logs: journalctl -u consul -f |
| High memory usage | Default resource limits | Tune GC settings in Nomad config: gc_interval = "1m" |
| Jobs not scheduling | No eligible nodes | Check node eligibility: nomad node status -verbose |
Next steps
- Advanced Nomad job templates and deployment strategies with rolling updates
- Integrate Consul with Kubernetes service discovery for hybrid workloads
- Configure advanced Consul ACL policies for production security
- Set up Nomad autoscaling with Prometheus metrics
- Implement Nomad secrets management with Vault integration
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Usage function
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -t, --type Node type: server|client (required)"
echo " -j, --join Comma-separated list of server IPs for retry_join"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 --type server --join 10.0.1.10,10.0.1.11,10.0.1.12"
echo " $0 --type client --join 10.0.1.10,10.0.1.11,10.0.1.12"
exit 1
}
# Error handling
cleanup() {
echo -e "${RED}[ERROR] Installation failed. Cleaning up...${NC}"
systemctl stop nomad consul 2>/dev/null || true
systemctl disable nomad consul 2>/dev/null || true
}
trap cleanup ERR
# Parse arguments
NODE_TYPE=""
JOIN_IPS=""
while [[ $# -gt 0 ]]; do
case $1 in
-t|--type)
NODE_TYPE="$2"
shift 2
;;
-j|--join)
JOIN_IPS="$2"
shift 2
;;
-h|--help)
usage
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
usage
;;
esac
done
# Validate arguments
if [[ -z "$NODE_TYPE" || ! "$NODE_TYPE" =~ ^(server|client)$ ]]; then
echo -e "${RED}Error: Node type must be 'server' or 'client'${NC}"
usage
fi
if [[ -z "$JOIN_IPS" ]]; then
echo -e "${RED}Error: Join IPs are required${NC}"
usage
fi
# Check if running as root or with sudo
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root or with sudo${NC}"
exit 1
fi
# Auto-detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update && apt upgrade -y"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}Setting up Nomad cluster on $PRETTY_NAME${NC}"
echo -e "${YELLOW}Node type: $NODE_TYPE${NC}"
# Step 1: Update system packages
echo -e "${GREEN}[1/10] Updating system packages...${NC}"
$PKG_UPDATE
$PKG_INSTALL curl unzip gpg
# Step 2: Install HashiCorp repository
echo -e "${GREEN}[2/10] Adding HashiCorp repository...${NC}"
if [[ "$PKG_MGR" == "apt" ]]; then
wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" > /etc/apt/sources.list.d/hashicorp.list
apt update
else
$PKG_INSTALL dnf-plugins-core 2>/dev/null || true
if command -v dnf &> /dev/null; then
dnf config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
else
yum-config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo
fi
fi
# Step 3: Install Nomad and Consul
echo -e "${GREEN}[3/10] Installing Nomad and Consul...${NC}"
$PKG_INSTALL nomad consul
# Step 4: Create directories
echo -e "${GREEN}[4/10] Creating directories...${NC}"
mkdir -p /etc/nomad.d /opt/nomad/data /etc/consul.d /opt/consul/data
mkdir -p /opt/nomad/tls /opt/consul/tls
chown -R nomad:nomad /etc/nomad.d /opt/nomad
chown -R consul:consul /etc/consul.d /opt/consul
# Step 5: Generate Consul encryption key and TLS certificates
echo -e "${GREEN}[5/10] Generating Consul TLS certificates and encryption key...${NC}"
cd /opt/consul/tls
consul tls ca create
if [[ "$NODE_TYPE" == "server" ]]; then
consul tls cert create -server -dc dc1
else
consul tls cert create -client -dc dc1
fi
chown -R consul:consul /opt/consul/tls
chmod 600 /opt/consul/tls/*.pem
CONSUL_ENCRYPT_KEY=$(consul keygen)
# Step 6: Generate Nomad TLS certificates
echo -e "${GREEN}[6/10] Generating Nomad TLS certificates...${NC}"
cd /opt/nomad/tls
nomad tls ca create
if [[ "$NODE_TYPE" == "server" ]]; then
nomad tls cert create -server -region global -dc dc1
else
nomad tls cert create -client -region global -dc dc1
fi
chown -R nomad:nomad /opt/nomad/tls
chmod 600 /opt/nomad/tls/*.pem
# Step 7: Configure Consul
echo -e "${GREEN}[7/10] Configuring Consul...${NC}"
IFS=',' read -ra JOIN_ARRAY <<< "$JOIN_IPS"
JOIN_CONFIG=""
for ip in "${JOIN_ARRAY[@]}"; do
JOIN_CONFIG="${JOIN_CONFIG}\"${ip}\", "
done
JOIN_CONFIG="[${JOIN_CONFIG%%, }]"
if [[ "$NODE_TYPE" == "server" ]]; then
cat > /etc/consul.d/consul.hcl << EOF
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
server = true
bootstrap_expect = 3
ui_config {
enabled = true
}
bind_addr = "{{ GetInterfaceIP \"eth0\" }}"
client_addr = "0.0.0.0"
retry_join = $JOIN_CONFIG
encrypt = "$CONSUL_ENCRYPT_KEY"
ca_file = "/opt/consul/tls/consul-agent-ca.pem"
cert_file = "/opt/consul/tls/dc1-server-consul-0.pem"
key_file = "/opt/consul/tls/dc1-server-consul-0-key.pem"
verify_incoming = true
verify_outgoing = true
verify_server_hostname = true
connect {
enabled = true
}
EOF
else
cat > /etc/consul.d/consul.hcl << EOF
datacenter = "dc1"
data_dir = "/opt/consul/data"
log_level = "INFO"
server = false
bind_addr = "{{ GetInterfaceIP \"eth0\" }}"
retry_join = $JOIN_CONFIG
encrypt = "$CONSUL_ENCRYPT_KEY"
ca_file = "/opt/consul/tls/consul-agent-ca.pem"
cert_file = "/opt/consul/tls/dc1-client-consul-0.pem"
key_file = "/opt/consul/tls/dc1-client-consul-0-key.pem"
verify_incoming = true
verify_outgoing = true
connect {
enabled = true
}
EOF
fi
chown consul:consul /etc/consul.d/consul.hcl
chmod 640 /etc/consul.d/consul.hcl
# Step 8: Configure Nomad
echo -e "${GREEN}[8/10] Configuring Nomad...${NC}"
if [[ "$NODE_TYPE" == "server" ]]; then
cat > /etc/nomad.d/nomad.hcl << EOF
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
server {
enabled = true
bootstrap_expect = 3
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
tls {
http = true
rpc = true
ca_file = "/opt/nomad/tls/nomad-agent-ca.pem"
cert_file = "/opt/nomad/tls/global-server-nomad.pem"
key_file = "/opt/nomad/tls/global-server-nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
EOF
else
cat > /etc/nomad.d/nomad.hcl << EOF
datacenter = "dc1"
data_dir = "/opt/nomad/data"
log_level = "INFO"
client {
enabled = true
}
consul {
address = "127.0.0.1:8500"
server_service_name = "nomad"
client_service_name = "nomad-client"
auto_advertise = true
server_auto_join = true
client_auto_join = true
}
tls {
http = true
rpc = true
ca_file = "/opt/nomad/tls/nomad-agent-ca.pem"
cert_file = "/opt/nomad/tls/global-client-nomad.pem"
key_file = "/opt/nomad/tls/global-client-nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
EOF
fi
chown nomad:nomad /etc/nomad.d/nomad.hcl
chmod 640 /etc/nomad.d/nomad.hcl
# Step 9: Start and enable services
echo -e "${GREEN}[9/10] Starting and enabling services...${NC}"
systemctl enable consul nomad
systemctl start consul
sleep 10
systemctl start nomad
# Step 10: Verify installation
echo -e "${GREEN}[10/10] Verifying installation...${NC}"
sleep 5
if systemctl is-active --quiet consul; then
echo -e "${GREEN}✓ Consul is running${NC}"
else
echo -e "${RED}✗ Consul is not running${NC}"
exit 1
fi
if systemctl is-active --quiet nomad; then
echo -e "${GREEN}✓ Nomad is running${NC}"
else
echo -e "${RED}✗ Nomad is not running${NC}"
exit 1
fi
echo -e "${GREEN}Installation completed successfully!${NC}"
echo -e "${YELLOW}Important information:${NC}"
echo -e "${YELLOW}Consul encryption key: $CONSUL_ENCRYPT_KEY${NC}"
echo -e "${YELLOW}Save this key - you'll need it for other nodes!${NC}"
echo ""
echo -e "${GREEN}Next steps:${NC}"
if [[ "$NODE_TYPE" == "server" ]]; then
echo "- Consul UI: https://$(hostname -I | awk '{print $1}'):8500"
echo "- Nomad UI: https://$(hostname -I | awk '{print $1}'):4646"
fi
echo "- Check status: systemctl status consul nomad"
echo "- View logs: journalctl -u consul -u nomad -f"
Review the script before running. Execute with: bash install.sh