Deploy a complete Prometheus and Grafana monitoring solution using Docker Compose with persistent storage, custom dashboards, and Alertmanager integration for production-ready observability.
Prerequisites
- Docker and Docker Compose installed
- Root or sudo access
- At least 4GB RAM available
- Open ports 3000, 9090, 9093
What this solves
A monitoring stack gives you visibility into your infrastructure and applications before problems become outages. This tutorial sets up Prometheus for metrics collection, Grafana for visualization, and Alertmanager for notifications using Docker Compose, creating a production-ready monitoring solution in under 30 minutes.
Step-by-step installation
Update system packages and install dependencies
Start by updating your system and installing Docker and Docker Compose if not already present.
sudo apt update && sudo apt upgrade -y
sudo apt install -y docker.io docker-compose-v2
sudo systemctl enable --now docker
sudo usermod -aG docker $USER
Log out and back in for the Docker group changes to take effect.
Create the project directory structure
Set up directories for configuration files, data persistence, and the Docker Compose setup.
mkdir -p ~/monitoring-stack/{prometheus,grafana,alertmanager}
cd ~/monitoring-stack
Configure Prometheus
Create the Prometheus configuration file with basic scraping targets and Alertmanager integration.
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'alertmanager'
static_configs:
- targets: ['alertmanager:9093']
Create Prometheus alert rules
Set up basic alerting rules for system monitoring and container health.
groups:
- name: system_alerts
rules:
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 2m
labels:
severity: warning
annotations:
summary: "High CPU usage detected on {{ $labels.instance }}"
description: "CPU usage is above 80% for more than 2 minutes on {{ $labels.instance }}"
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 2m
labels:
severity: warning
annotations:
summary: "High memory usage detected on {{ $labels.instance }}"
description: "Memory usage is above 85% for more than 2 minutes on {{ $labels.instance }}"
- alert: DiskSpaceLow
expr: (1 - (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"})) * 100 > 90
for: 1m
labels:
severity: critical
annotations:
summary: "Disk space low on {{ $labels.instance }}"
description: "Disk usage is above 90% on filesystem {{ $labels.mountpoint }}"
- alert: ContainerDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.job }} is down"
description: "Container {{ $labels.job }} on {{ $labels.instance }} has been down for more than 1 minute"
Configure Alertmanager
Set up Alertmanager to handle alert routing and notifications via email and Slack.
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alerts@example.com'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
email_configs:
- to: 'admin@example.com'
subject: 'Alert: {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
{{ end }}
# Uncomment and configure for Slack notifications
# slack_configs:
# - api_url: 'YOUR_SLACK_WEBHOOK_URL'
# channel: '#alerts'
# title: 'Alert: {{ .GroupLabels.alertname }}'
# text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
Create Grafana configuration
Set up Grafana with automatic Prometheus datasource provisioning and default dashboards.
mkdir -p ~/monitoring-stack/grafana/{provisioning/datasources,provisioning/dashboards,dashboards}
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
apiVersion: 1
providers:
- name: 'Default'
orgId: 1
folder: ''
folderUid: ''
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
Download pre-built dashboards
Get popular Grafana dashboards for system monitoring and Docker container metrics.
cd ~/monitoring-stack/grafana/dashboards
Node Exporter Full dashboard
curl -o node-exporter-full.json "https://grafana.com/api/dashboards/1860/revisions/37/download"
Docker Container & Host Metrics dashboard
curl -o docker-monitoring.json "https://grafana.com/api/dashboards/193/revisions/5/download"
Prometheus Stats dashboard
curl -o prometheus-stats.json "https://grafana.com/api/dashboards/2/revisions/2/download"
Create the Docker Compose configuration
Define all services including Prometheus, Grafana, Alertmanager, and monitoring exporters.
version: '3.8'
networks:
monitoring:
driver: bridge
volumes:
prometheus_data:
grafana_data:
alertmanager_data:
services:
prometheus:
image: prom/prometheus:v2.48.0
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
networks:
- monitoring
grafana:
image: grafana/grafana:10.2.2
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/dashboards:/var/lib/grafana/dashboards
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123!
- GF_USERS_ALLOW_SIGN_UP=false
- GF_INSTALL_PLUGINS=grafana-piechart-panel
networks:
- monitoring
alertmanager:
image: prom/alertmanager:v0.26.0
container_name: alertmanager
restart: unless-stopped
ports:
- "9093:9093"
volumes:
- ./alertmanager:/etc/alertmanager
- alertmanager_data:/alertmanager
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=http://localhost:9093'
networks:
- monitoring
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
networks:
- monitoring
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
networks:
- monitoring
Set proper file permissions
Ensure the monitoring stack can read configuration files and write to data directories.
cd ~/monitoring-stack
Set ownership for Grafana (runs as user ID 472)
sudo chown -R 472:472 grafana/
Set proper permissions for configuration files
chmod -R 644 prometheus/ alertmanager/
chmod 755 prometheus/ alertmanager/
Make sure Docker can read the compose file
chmod 644 docker-compose.yml
Launch the monitoring stack
Start all services and verify they come up healthy.
docker compose up -d
Check that all containers are running
docker compose ps
View logs to ensure everything started properly
docker compose logs -f
Configure Grafana dashboards and alerts
Access Grafana web interface
Open Grafana in your browser and set up the initial configuration.
# Open Grafana at http://localhost:3000
Default credentials: admin / admin123!
Navigate to Dashboards to see the pre-loaded system monitoring dashboards. The Node Exporter Full dashboard provides comprehensive system metrics, while the Docker monitoring dashboard shows container performance.
Create custom dashboard
Build a custom dashboard for application-specific metrics and alerts.
- Click + Create Dashboard in Grafana
- Add a new panel and select Prometheus as the data source
- Use queries like
upto show service availability orrate(prometheus_http_requests_total[5m])for request rates - Configure alert rules by clicking the Alert tab in the panel editor
- Set conditions like
IS BELOW 1for theupmetric to alert when services go down
Configure notification channels
Set up Grafana to send alerts via email, Slack, or webhooks.
- Go to Alerting → Notification channels
- Click Add channel and choose your preferred method
- For email: Configure SMTP settings in
/etc/grafana/grafana.ini - For Slack: Add your webhook URL and choose the channel
- Test the notification to ensure delivery works
Set up monitoring targets
Add external targets to Prometheus
Configure Prometheus to monitor additional services and applications.
# Add this section to your existing prometheus.yml
- job_name: 'nginx'
static_configs:
- targets: ['203.0.113.10:9113'] # nginx-prometheus-exporter
- job_name: 'mysql'
static_configs:
- targets: ['203.0.113.11:9104'] # mysqld_exporter
- job_name: 'redis'
static_configs:
- targets: ['203.0.113.12:9121'] # redis_exporter
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx] # Look for a HTTP 200 response.
static_configs:
- targets:
- https://example.com # Target to probe
- https://api.example.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
Reload Prometheus configuration without restarting:
curl -X POST http://localhost:9090/-/reload
Add blackbox exporter for uptime monitoring
Monitor website and API availability with HTTP probes.
mkdir ~/monitoring-stack/blackbox-exporter
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
method: GET
follow_redirects: true
preferred_ip_protocol: "ip4"
http_post_2xx:
prober: http
timeout: 5s
http:
method: POST
headers:
Content-Type: application/json
body: '{"test": "data"}'
tcp_connect:
prober: tcp
timeout: 5s
Add the blackbox exporter service to your docker-compose.yml:
# Add this service to your existing docker-compose.yml
blackbox-exporter:
image: prom/blackbox-exporter:v0.24.0
container_name: blackbox-exporter
restart: unless-stopped
ports:
- "9115:9115"
volumes:
- ./blackbox-exporter:/etc/blackbox_exporter
command:
- '--config.file=/etc/blackbox_exporter/blackbox.yml'
networks:
- monitoring
Verify your setup
Check that all components are working correctly and collecting metrics.
# Verify all containers are running
docker compose ps
Check Prometheus targets
curl http://localhost:9090/api/v1/targets
Test Grafana dashboard
curl -s http://admin:admin123!@localhost:3000/api/health
Verify Alertmanager is receiving alerts
curl http://localhost:9093/api/v1/status
Check node-exporter metrics
curl http://localhost:9100/metrics | head -20
Test a Prometheus query
curl 'http://localhost:9090/api/v1/query?query=up'
Access the web interfaces to verify everything is working:
- Prometheus:
http://localhost:9090 - Grafana:
http://localhost:3000(admin/admin123!) - Alertmanager:
http://localhost:9093 - Node Exporter:
http://localhost:9100/metrics - cAdvisor:
http://localhost:8080
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Grafana permission denied errors | Wrong ownership of data directory | sudo chown -R 472:472 grafana/ |
| Prometheus can't scrape targets | Network connectivity or firewall | Check docker network ls and service availability |
| Containers keep restarting | Configuration syntax errors | Check logs with docker compose logs [service] |
| Dashboards not loading | Datasource not configured | Verify Prometheus datasource in Grafana settings |
| Alerts not triggering | Alert rules syntax or thresholds | Test expressions in Prometheus web UI |
| High memory usage | Prometheus retention settings | Adjust --storage.tsdb.retention.time in compose file |
| Missing metrics | Exporter not running or misconfigured | Verify target health in Prometheus targets page |
Next steps
- Set up Kubernetes monitoring with Prometheus Operator for container orchestration monitoring
- Configure advanced Grafana dashboards and alerting with Prometheus integration for enhanced visualization
- Set up Alertmanager with email and Slack notifications for monitoring alerts for comprehensive alerting
- Monitor nginx performance with Prometheus and Grafana using nginx-prometheus-exporter for web server monitoring
- Configure Prometheus long-term storage with Thanos for unlimited data retention for enterprise storage
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Global variables
INSTALL_DIR="${HOME}/monitoring-stack"
COMPOSE_FILE="${INSTALL_DIR}/docker-compose.yml"
# Usage function
usage() {
echo "Usage: $0 [--email admin@example.com] [--slack-webhook URL]"
echo " --email: Email address for alerts (optional)"
echo " --slack-webhook: Slack webhook URL for notifications (optional)"
exit 1
}
# Cleanup function
cleanup() {
echo -e "${RED}[ERROR] Installation failed. Cleaning up...${NC}"
if [ -d "$INSTALL_DIR" ]; then
cd "$INSTALL_DIR" && docker compose down 2>/dev/null || true
rm -rf "$INSTALL_DIR"
fi
exit 1
}
trap cleanup ERR
# Parse arguments
EMAIL_ALERT=""
SLACK_WEBHOOK=""
while [[ $# -gt 0 ]]; do
case $1 in
--email)
EMAIL_ALERT="$2"
shift 2
;;
--slack-webhook)
SLACK_WEBHOOK="$2"
shift 2
;;
-h|--help)
usage
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
usage
;;
esac
done
echo -e "${GREEN}[1/8] Detecting system and checking prerequisites...${NC}"
# Check if running as root
if [[ $EUID -eq 0 ]]; then
echo -e "${RED}Please run this script as a regular user, not root${NC}"
exit 1
fi
# Check sudo access
if ! sudo -n true 2>/dev/null; then
echo -e "${YELLOW}This script requires sudo access${NC}"
sudo -v
fi
# Detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
DOCKER_PKG="docker.io"
COMPOSE_PKG="docker-compose-v2"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
DOCKER_PKG="docker"
COMPOSE_PKG="docker-compose"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
DOCKER_PKG="docker"
COMPOSE_PKG="docker-compose"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}[2/8] Updating system packages...${NC}"
sudo $PKG_UPDATE
echo -e "${GREEN}[3/8] Installing Docker and Docker Compose...${NC}"
if ! command -v docker &> /dev/null; then
sudo $PKG_INSTALL $DOCKER_PKG
sudo systemctl enable --now docker
sudo usermod -aG docker "$USER"
fi
if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
sudo $PKG_INSTALL $COMPOSE_PKG
fi
echo -e "${GREEN}[4/8] Creating directory structure...${NC}"
mkdir -p "${INSTALL_DIR}"/{prometheus,grafana,alertmanager}
cd "$INSTALL_DIR"
echo -e "${GREEN}[5/8] Creating Prometheus configuration...${NC}"
cat > prometheus/prometheus.yml << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'alertmanager'
static_configs:
- targets: ['alertmanager:9093']
EOF
cat > prometheus/alert_rules.yml << 'EOF'
groups:
- name: system_alerts
rules:
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 2m
labels:
severity: warning
annotations:
summary: "High CPU usage detected on {{ $labels.instance }}"
description: "CPU usage is above 80% for more than 2 minutes on {{ $labels.instance }}"
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 2m
labels:
severity: warning
annotations:
summary: "High memory usage detected on {{ $labels.instance }}"
description: "Memory usage is above 85% for more than 2 minutes on {{ $labels.instance }}"
- alert: DiskSpaceLow
expr: (1 - (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"})) * 100 > 90
for: 1m
labels:
severity: critical
annotations:
summary: "Disk space low on {{ $labels.instance }}"
description: "Disk usage is above 90% on filesystem {{ $labels.mountpoint }}"
- alert: ContainerDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.job }} is down"
description: "Container {{ $labels.job }} on {{ $labels.instance }} has been down for more than 1 minute"
EOF
echo -e "${GREEN}[6/8] Creating Alertmanager configuration...${NC}"
cat > alertmanager/alertmanager.yml << EOF
global:
smtp_smarthost: 'localhost:587'
smtp_from: '${EMAIL_ALERT:-alerts@example.com}'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'default-receiver'
receivers:
- name: 'default-receiver'
EOF
if [ -n "$EMAIL_ALERT" ]; then
cat >> alertmanager/alertmanager.yml << EOF
email_configs:
- to: '$EMAIL_ALERT'
subject: 'Alert: {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
{{ end }}
EOF
fi
if [ -n "$SLACK_WEBHOOK" ]; then
cat >> alertmanager/alertmanager.yml << EOF
slack_configs:
- api_url: '$SLACK_WEBHOOK'
channel: '#alerts'
title: 'Alert: {{ .GroupLabels.alertname }}'
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
EOF
fi
echo -e "${GREEN}[7/8] Creating Docker Compose configuration...${NC}"
cat > docker-compose.yml << 'EOF'
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
volumes:
- grafana-data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
restart: unless-stopped
ports:
- "9093:9093"
volumes:
- ./alertmanager:/etc/alertmanager
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
volumes:
prometheus-data:
grafana-data:
EOF
chmod 644 prometheus/*.yml alertmanager/*.yml docker-compose.yml
echo -e "${GREEN}[8/8] Starting monitoring stack...${NC}"
newgrp docker << EONG
docker compose up -d
EONG
echo -e "${GREEN}Installation completed successfully!${NC}"
echo -e "${YELLOW}Services are starting up...${NC}"
echo ""
echo "Access URLs:"
echo "- Grafana: http://localhost:3000 (admin/admin123)"
echo "- Prometheus: http://localhost:9090"
echo "- Alertmanager: http://localhost:9093"
echo "- Node Exporter: http://localhost:9100"
echo "- cAdvisor: http://localhost:8080"
echo ""
echo -e "${YELLOW}Note: You may need to log out and back in for Docker group changes to take effect${NC}"
Review the script before running. Execute with: bash install.sh