Set up comprehensive Docker container monitoring with Prometheus, Grafana, and cAdvisor to track resource usage, performance metrics, and container health in production environments.
Prerequisites
- Root or sudo access
- 4GB RAM minimum
- Docker and Docker Compose
- Open firewall ports 3000, 8080, 9090, 9100
What this solves
Docker containers in production need continuous monitoring to track resource usage, performance bottlenecks, and system health. This tutorial sets up a complete monitoring stack with Prometheus for metrics collection, cAdvisor for detailed container statistics, and Grafana for visualization and alerting. You'll get real-time insights into CPU, memory, network, and disk usage across all your containers.
Step-by-step installation
Update system packages
Start by updating your system to ensure you have the latest security patches and packages.
sudo apt update && sudo apt upgrade -y
Install Docker Engine
Install Docker to run your containers and the monitoring stack. This adds the official Docker repository and installs the latest stable version.
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker $USER
newgrp docker
Install Docker Compose
Docker Compose simplifies multi-container deployments by defining services in a single configuration file.
sudo apt install -y docker-compose-plugin
Create monitoring directory structure
Organize your monitoring configuration files in a dedicated directory with proper permissions.
mkdir -p ~/docker-monitoring/{prometheus,grafana/dashboards,grafana/provisioning/{dashboards,datasources}}
cd ~/docker-monitoring
Configure Prometheus
Create the Prometheus configuration to scrape metrics from cAdvisor and itself.
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets: []
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 5s
metrics_path: /metrics
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
Create Prometheus alerting rules
Define alerting rules for container health, high resource usage, and system issues.
groups:
- name: docker-containers
rules:
- alert: ContainerHighCPUUsage
expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high CPU usage"
description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."
- alert: ContainerHighMemoryUsage
expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high memory usage"
description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."
- alert: ContainerDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.instance }} is down"
description: "Container {{ $labels.instance }} has been down for more than 1 minute."
- alert: HighDiskUsage
expr: (container_fs_usage_bytes / container_fs_limit_bytes) * 100 > 85
for: 10m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high disk usage"
description: "Container {{ $labels.name }} disk usage is above 85% for more than 10 minutes."
Configure Grafana datasource
Automatically configure Prometheus as a datasource when Grafana starts.
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
Configure Grafana dashboard provisioning
Set up automatic dashboard loading from the dashboards directory.
apiVersion: 1
providers:
- name: 'Docker Container Monitoring'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
Create Docker container dashboard
Create a comprehensive dashboard for monitoring Docker containers with key metrics and visualizations.
{
"dashboard": {
"id": null,
"title": "Docker Container Monitoring",
"tags": ["docker", "containers"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Container CPU Usage",
"type": "graph",
"targets": [
{
"expr": "rate(container_cpu_usage_seconds_total{name!~\".POD.\",name!=\"\"}[5m]) * 100",
"legendFormat": "{{ name }}",
"refId": "A"
}
],
"yAxes": [
{
"label": "CPU Usage %",
"max": 100,
"min": 0
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
}
},
{
"id": 2,
"title": "Container Memory Usage",
"type": "graph",
"targets": [
{
"expr": "container_memory_usage_bytes{name!~\".POD.\",name!=\"\"}",
"legendFormat": "{{ name }}",
"refId": "A"
}
],
"yAxes": [
{
"label": "Memory Usage (Bytes)"
}
],
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
}
},
{
"id": 3,
"title": "Container Network I/O",
"type": "graph",
"targets": [
{
"expr": "rate(container_network_receive_bytes_total{name!~\".POD.\",name!=\"\"}[5m])",
"legendFormat": "{{ name }} - RX",
"refId": "A"
},
{
"expr": "rate(container_network_transmit_bytes_total{name!~\".POD.\",name!=\"\"}[5m])",
"legendFormat": "{{ name }} - TX",
"refId": "B"
}
],
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
}
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "10s"
}
}
Create Docker Compose configuration
Define all monitoring services in a single Docker Compose file for easy deployment and management.
version: '3.8'
services:
prometheus:
image: prom/prometheus:v2.45.0
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
- '--storage.tsdb.retention.time=15d'
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
networks:
- monitoring
restart: unless-stopped
grafana:
image: grafana/grafana:10.0.3
container_name: grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
networks:
- monitoring
restart: unless-stopped
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.0
container_name: cadvisor
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
networks:
- monitoring
restart: unless-stopped
node-exporter:
image: prom/node-exporter:v1.6.1
container_name: node-exporter
ports:
- "9100:9100"
command:
- '--path.rootfs=/host'
volumes:
- '/:/host:ro,rslave'
pid: host
networks:
- monitoring
restart: unless-stopped
# Example application container to monitor
nginx-demo:
image: nginx:alpine
container_name: nginx-demo
ports:
- "8081:80"
networks:
- monitoring
restart: unless-stopped
volumes:
prometheus_data:
grafana_data:
networks:
monitoring:
driver: bridge
Set proper file permissions
Ensure the monitoring stack can read configuration files and write data with correct ownership.
sudo chown -R $USER:$USER ~/docker-monitoring
chmod -R 755 ~/docker-monitoring
chmod 644 ~/docker-monitoring/prometheus/prometheus.yml
chmod 644 ~/docker-monitoring/prometheus/alert_rules.yml
chmod 644 ~/docker-monitoring/grafana/provisioning/datasources/prometheus.yml
Configure firewall rules
Open necessary ports for the monitoring services while maintaining security.
sudo ufw allow 3000/tcp comment 'Grafana'
sudo ufw allow 9090/tcp comment 'Prometheus'
sudo ufw allow 8080/tcp comment 'cAdvisor'
sudo ufw allow 9100/tcp comment 'Node Exporter'
Deploy the monitoring stack
Start all monitoring services using Docker Compose.
cd ~/docker-monitoring
docker compose up -d
Configure Grafana dashboard alerts
Set up notification channels and alert rules within Grafana for proactive monitoring.
Verify your setup
Check that all monitoring services are running and collecting data properly.
docker compose ps
docker compose logs prometheus
docker compose logs grafana
docker compose logs cadvisor
Verify service endpoints are responding:
curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[].health'
curl -s http://localhost:8080/metrics | head -10
curl -s http://localhost:3000/api/health
Check container metrics are being collected:
curl -s 'http://localhost:9090/api/v1/query?query=up' | jq '.data.result[].metric.job'
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| cAdvisor container exits with permission error | Insufficient privileges for system access | Ensure privileged: true in docker-compose.yml and restart |
| Prometheus shows targets down | Network connectivity or service discovery issues | Check docker compose logs prometheus and verify service names in config |
| Grafana dashboards show no data | Datasource misconfiguration | Verify Prometheus URL is http://prometheus:9090 in datasource settings |
| High memory usage by cAdvisor | Default retention settings | Add --housekeeping_interval=30s --max_housekeeping_interval=35s to cAdvisor command |
| Alerts not firing | Alert rules syntax error | Validate YAML syntax and check Prometheus logs: docker compose logs prometheus |
Next steps
- Set up Prometheus and Grafana monitoring stack with Docker compose
- Configure Prometheus Alertmanager with email notifications for production alerts
- Implement Grafana advanced alerting with webhooks and notification channels
- Monitor Kubernetes clusters with Prometheus and Grafana for container orchestration insights
- Configure Loki and Promtail for centralized Docker log aggregation and analysis
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Global variables
INSTALL_DIR="$HOME/docker-monitoring"
COMPOSE_FILE="$INSTALL_DIR/docker-compose.yml"
# Error handling
cleanup() {
echo -e "${RED}Installation failed. Cleaning up...${NC}"
if [ -d "$INSTALL_DIR" ]; then
rm -rf "$INSTALL_DIR"
fi
exit 1
}
trap cleanup ERR
usage() {
echo "Usage: $0"
echo "Installs Docker monitoring stack with Prometheus, Grafana, and cAdvisor"
echo "No arguments required - auto-detects system configuration"
exit 1
}
log_step() {
echo -e "${GREEN}[$1] $2${NC}"
}
log_warning() {
echo -e "${YELLOW}Warning: $1${NC}"
}
log_error() {
echo -e "${RED}Error: $1${NC}"
}
# Detect distribution and package manager
detect_distro() {
if [ ! -f /etc/os-release ]; then
log_error "Cannot detect distribution. /etc/os-release not found."
exit 1
fi
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update && apt upgrade -y"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
;;
*)
log_error "Unsupported distribution: $ID"
exit 1
;;
esac
}
check_prerequisites() {
if [ "$EUID" -eq 0 ]; then
log_error "Do not run this script as root. It will use sudo when needed."
exit 1
fi
if ! command -v sudo &> /dev/null; then
log_error "sudo is required but not installed"
exit 1
fi
if ! command -v curl &> /dev/null; then
log_error "curl is required but not installed"
exit 1
fi
}
update_system() {
log_step "1/7" "Updating system packages"
sudo $PKG_UPDATE
}
install_docker() {
log_step "2/7" "Installing Docker Engine"
if command -v docker &> /dev/null; then
log_warning "Docker already installed, skipping"
return
fi
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
rm get-docker.sh
sudo usermod -aG docker "$USER"
sudo systemctl enable docker
sudo systemctl start docker
}
install_docker_compose() {
log_step "3/7" "Installing Docker Compose plugin"
sudo $PKG_INSTALL docker-compose-plugin
}
create_directory_structure() {
log_step "4/7" "Creating monitoring directory structure"
mkdir -p "$INSTALL_DIR"/{prometheus,grafana/{dashboards,provisioning/{dashboards,datasources}}}
chmod 755 "$INSTALL_DIR"
chmod -R 755 "$INSTALL_DIR"/{prometheus,grafana}
}
create_configuration_files() {
log_step "5/7" "Creating configuration files"
# Prometheus configuration
cat > "$INSTALL_DIR/prometheus/prometheus.yml" << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets: []
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 5s
metrics_path: /metrics
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
EOF
# Prometheus alert rules
cat > "$INSTALL_DIR/prometheus/alert_rules.yml" << 'EOF'
groups:
- name: docker-containers
rules:
- alert: ContainerHighCPUUsage
expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high CPU usage"
description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."
- alert: ContainerHighMemoryUsage
expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high memory usage"
description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."
- alert: ContainerDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.instance }} is down"
description: "Container {{ $labels.instance }} has been down for more than 1 minute."
EOF
# Grafana datasource
cat > "$INSTALL_DIR/grafana/provisioning/datasources/datasource.yml" << 'EOF'
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
EOF
# Grafana dashboard provisioning
cat > "$INSTALL_DIR/grafana/provisioning/dashboards/dashboard.yml" << 'EOF'
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
options:
path: /var/lib/grafana/dashboards
EOF
chmod 644 "$INSTALL_DIR"/prometheus/*.yml
chmod 644 "$INSTALL_DIR"/grafana/provisioning/*/*.yml
}
create_docker_compose() {
log_step "6/7" "Creating Docker Compose configuration"
cat > "$COMPOSE_FILE" << 'EOF'
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg:/dev/kmsg
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
prometheus_data:
grafana_data:
EOF
chmod 644 "$COMPOSE_FILE"
}
start_monitoring_stack() {
log_step "7/7" "Starting monitoring stack"
cd "$INSTALL_DIR"
docker compose up -d
# Wait for services to start
sleep 10
}
verify_installation() {
echo -e "${GREEN}Verifying installation...${NC}"
local failed=0
if ! docker compose ps | grep -q "Up"; then
log_error "Some containers are not running"
failed=1
fi
if ! curl -s http://localhost:9090/api/v1/query?query=up > /dev/null; then
log_error "Prometheus is not accessible"
failed=1
fi
if ! curl -s http://localhost:3000 > /dev/null; then
log_error "Grafana is not accessible"
failed=1
fi
if [ $failed -eq 0 ]; then
echo -e "${GREEN}✓ Installation completed successfully!${NC}"
echo -e "${GREEN}Access URLs:${NC}"
echo " Prometheus: http://localhost:9090"
echo " Grafana: http://localhost:3000 (admin/admin)"
echo " cAdvisor: http://localhost:8080"
echo " Node Exporter: http://localhost:9100"
echo
echo -e "${YELLOW}Note: You may need to logout and login again for Docker group membership to take effect${NC}"
else
exit 1
fi
}
main() {
if [ $# -ne 0 ]; then
usage
fi
check_prerequisites
detect_distro
update_system
install_docker
install_docker_compose
create_directory_structure
create_configuration_files
create_docker_compose
start_monitoring_stack
verify_installation
}
main "$@"
Review the script before running. Execute with: bash install.sh