Monitor Docker Containers with Prometheus & Grafana

Set up comprehensive Docker container monitoring with Prometheus, Grafana, and cAdvisor to track resource usage, performance metrics, and container health in production environments.

Prerequisites

Root or sudo access
4GB RAM minimum
Docker and Docker Compose
Open firewall ports 3000, 8080, 9090, 9100

What this solves

Docker containers in production need continuous monitoring to track resource usage, performance bottlenecks, and system health. This tutorial sets up a complete monitoring stack with Prometheus for metrics collection, cAdvisor for detailed container statistics, and Grafana for visualization and alerting. You'll get real-time insights into CPU, memory, network, and disk usage across all your containers.

Step-by-step installation

Update system packages

Start by updating your system to ensure you have the latest security patches and packages.

sudo apt update && sudo apt upgrade -y

sudo dnf update -y

Install Docker Engine

Install Docker to run your containers and the monitoring stack. This adds the official Docker repository and installs the latest stable version.

curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker $USER
newgrp docker

curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker $USER
newgrp docker

Install Docker Compose

Docker Compose simplifies multi-container deployments by defining services in a single configuration file.

sudo apt install -y docker-compose-plugin

sudo dnf install -y docker-compose-plugin

Create monitoring directory structure

Organize your monitoring configuration files in a dedicated directory with proper permissions.

mkdir -p ~/docker-monitoring/{prometheus,grafana/dashboards,grafana/provisioning/{dashboards,datasources}}
cd ~/docker-monitoring

Configure Prometheus

Create the Prometheus configuration to scrape metrics from cAdvisor and itself.

global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "alert_rules.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets: []

scrape_configs:
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']

  - job_name: 'cadvisor'
    static_configs:
      - targets: ['cadvisor:8080']
    scrape_interval: 5s
    metrics_path: /metrics

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']

Create Prometheus alerting rules

Define alerting rules for container health, high resource usage, and system issues.

groups:
  - name: docker-containers
    rules:
      - alert: ContainerHighCPUUsage
        expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Container {{ $labels.name }} high CPU usage"
          description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."

      - alert: ContainerHighMemoryUsage
        expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Container {{ $labels.name }} high memory usage"
          description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."

      - alert: ContainerDown
        expr: up == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Container {{ $labels.instance }} is down"
          description: "Container {{ $labels.instance }} has been down for more than 1 minute."

      - alert: HighDiskUsage
        expr: (container_fs_usage_bytes / container_fs_limit_bytes) * 100 > 85
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Container {{ $labels.name }} high disk usage"
          description: "Container {{ $labels.name }} disk usage is above 85% for more than 10 minutes."

Configure Grafana datasource

Automatically configure Prometheus as a datasource when Grafana starts.

apiVersion: 1

datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://prometheus:9090
    isDefault: true
    editable: true

Configure Grafana dashboard provisioning

Set up automatic dashboard loading from the dashboards directory.

apiVersion: 1

providers:
  - name: 'Docker Container Monitoring'
    orgId: 1
    folder: ''
    type: file
    disableDeletion: false
    updateIntervalSeconds: 10
    allowUiUpdates: true
    options:
      path: /etc/grafana/provisioning/dashboards

Create Docker container dashboard

Create a comprehensive dashboard for monitoring Docker containers with key metrics and visualizations.

{
  "dashboard": {
    "id": null,
    "title": "Docker Container Monitoring",
    "tags": ["docker", "containers"],
    "timezone": "browser",
    "panels": [
      {
        "id": 1,
        "title": "Container CPU Usage",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(container_cpu_usage_seconds_total{name!~\".POD.\",name!=\"\"}[5m]) * 100",
            "legendFormat": "{{ name }}",
            "refId": "A"
          }
        ],
        "yAxes": [
          {
            "label": "CPU Usage %",
            "max": 100,
            "min": 0
          }
        ],
        "gridPos": {
          "h": 8,
          "w": 12,
          "x": 0,
          "y": 0
        }
      },
      {
        "id": 2,
        "title": "Container Memory Usage",
        "type": "graph",
        "targets": [
          {
            "expr": "container_memory_usage_bytes{name!~\".POD.\",name!=\"\"}",
            "legendFormat": "{{ name }}",
            "refId": "A"
          }
        ],
        "yAxes": [
          {
            "label": "Memory Usage (Bytes)"
          }
        ],
        "gridPos": {
          "h": 8,
          "w": 12,
          "x": 12,
          "y": 0
        }
      },
      {
        "id": 3,
        "title": "Container Network I/O",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(container_network_receive_bytes_total{name!~\".POD.\",name!=\"\"}[5m])",
            "legendFormat": "{{ name }} - RX",
            "refId": "A"
          },
          {
            "expr": "rate(container_network_transmit_bytes_total{name!~\".POD.\",name!=\"\"}[5m])",
            "legendFormat": "{{ name }} - TX",
            "refId": "B"
          }
        ],
        "gridPos": {
          "h": 8,
          "w": 24,
          "x": 0,
          "y": 8
        }
      }
    ],
    "time": {
      "from": "now-1h",
      "to": "now"
    },
    "refresh": "10s"
  }
}

Create Docker Compose configuration

Define all monitoring services in a single Docker Compose file for easy deployment and management.

version: '3.8'

services:
  prometheus:
    image: prom/prometheus:v2.45.0
    container_name: prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--web.enable-lifecycle'
      - '--storage.tsdb.retention.time=15d'
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus:/etc/prometheus
      - prometheus_data:/prometheus
    networks:
      - monitoring
    restart: unless-stopped

  grafana:
    image: grafana/grafana:10.0.3
    container_name: grafana
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin123
      - GF_USERS_ALLOW_SIGN_UP=false
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
    networks:
      - monitoring
    restart: unless-stopped

  cadvisor:
    image: gcr.io/cadvisor/cadvisor:v0.47.0
    container_name: cadvisor
    ports:
      - "8080:8080"
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    privileged: true
    devices:
      - /dev/kmsg
    networks:
      - monitoring
    restart: unless-stopped

  node-exporter:
    image: prom/node-exporter:v1.6.1
    container_name: node-exporter
    ports:
      - "9100:9100"
    command:
      - '--path.rootfs=/host'
    volumes:
      - '/:/host:ro,rslave'
    pid: host
    networks:
      - monitoring
    restart: unless-stopped

  # Example application container to monitor
  nginx-demo:
    image: nginx:alpine
    container_name: nginx-demo
    ports:
      - "8081:80"
    networks:
      - monitoring
    restart: unless-stopped

volumes:
  prometheus_data:
  grafana_data:

networks:
  monitoring:
    driver: bridge

Set proper file permissions

Ensure the monitoring stack can read configuration files and write data with correct ownership.

Never use chmod 777. It gives every user on the system full access to your files. Instead, fix ownership with chown and use minimal permissions.

sudo chown -R $USER:$USER ~/docker-monitoring
chmod -R 755 ~/docker-monitoring
chmod 644 ~/docker-monitoring/prometheus/prometheus.yml
chmod 644 ~/docker-monitoring/prometheus/alert_rules.yml
chmod 644 ~/docker-monitoring/grafana/provisioning/datasources/prometheus.yml

Configure firewall rules

Open necessary ports for the monitoring services while maintaining security.

sudo ufw allow 3000/tcp comment 'Grafana'
sudo ufw allow 9090/tcp comment 'Prometheus'
sudo ufw allow 8080/tcp comment 'cAdvisor'
sudo ufw allow 9100/tcp comment 'Node Exporter'

sudo firewall-cmd --permanent --add-port=3000/tcp
sudo firewall-cmd --permanent --add-port=9090/tcp
sudo firewall-cmd --permanent --add-port=8080/tcp
sudo firewall-cmd --permanent --add-port=9100/tcp
sudo firewall-cmd --reload

Deploy the monitoring stack

Start all monitoring services using Docker Compose.

cd ~/docker-monitoring
docker compose up -d

Configure Grafana dashboard alerts

Set up notification channels and alert rules within Grafana for proactive monitoring.

Note: Access Grafana at http://your-server-ip:3000 with admin/admin123 credentials. Change the password immediately after first login.

Verify your setup

Check that all monitoring services are running and collecting data properly.

docker compose ps
docker compose logs prometheus
docker compose logs grafana
docker compose logs cadvisor

Verify service endpoints are responding:

curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[].health'
curl -s http://localhost:8080/metrics | head -10
curl -s http://localhost:3000/api/health

Check container metrics are being collected:

curl -s 'http://localhost:9090/api/v1/query?query=up' | jq '.data.result[].metric.job'

Common issues

Symptom	Cause	Fix
cAdvisor container exits with permission error	Insufficient privileges for system access	Ensure `privileged: true` in docker-compose.yml and restart
Prometheus shows targets down	Network connectivity or service discovery issues	Check `docker compose logs prometheus` and verify service names in config
Grafana dashboards show no data	Datasource misconfiguration	Verify Prometheus URL is `http://prometheus:9090` in datasource settings
High memory usage by cAdvisor	Default retention settings	Add `--housekeeping_interval=30s --max_housekeeping_interval=35s` to cAdvisor command
Alerts not firing	Alert rules syntax error	Validate YAML syntax and check Prometheus logs: `docker compose logs prometheus`

Next steps

Automated install script

Run this to automate the entire setup

install.sh

#!/usr/bin/env bash

set -euo pipefail

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

# Global variables
INSTALL_DIR="$HOME/docker-monitoring"
COMPOSE_FILE="$INSTALL_DIR/docker-compose.yml"

# Error handling
cleanup() {
    echo -e "${RED}Installation failed. Cleaning up...${NC}"
    if [ -d "$INSTALL_DIR" ]; then
        rm -rf "$INSTALL_DIR"
    fi
    exit 1
}

trap cleanup ERR

usage() {
    echo "Usage: $0"
    echo "Installs Docker monitoring stack with Prometheus, Grafana, and cAdvisor"
    echo "No arguments required - auto-detects system configuration"
    exit 1
}

log_step() {
    echo -e "${GREEN}[$1] $2${NC}"
}

log_warning() {
    echo -e "${YELLOW}Warning: $1${NC}"
}

log_error() {
    echo -e "${RED}Error: $1${NC}"
}

# Detect distribution and package manager
detect_distro() {
    if [ ! -f /etc/os-release ]; then
        log_error "Cannot detect distribution. /etc/os-release not found."
        exit 1
    fi

    . /etc/os-release
    case "$ID" in
        ubuntu|debian)
            PKG_MGR="apt"
            PKG_UPDATE="apt update && apt upgrade -y"
            PKG_INSTALL="apt install -y"
            ;;
        almalinux|rocky|centos|rhel|ol|fedora)
            PKG_MGR="dnf"
            PKG_UPDATE="dnf update -y"
            PKG_INSTALL="dnf install -y"
            ;;
        amzn)
            PKG_MGR="yum"
            PKG_UPDATE="yum update -y"
            PKG_INSTALL="yum install -y"
            ;;
        *)
            log_error "Unsupported distribution: $ID"
            exit 1
            ;;
    esac
}

check_prerequisites() {
    if [ "$EUID" -eq 0 ]; then
        log_error "Do not run this script as root. It will use sudo when needed."
        exit 1
    fi

    if ! command -v sudo &> /dev/null; then
        log_error "sudo is required but not installed"
        exit 1
    fi

    if ! command -v curl &> /dev/null; then
        log_error "curl is required but not installed"
        exit 1
    fi
}

update_system() {
    log_step "1/7" "Updating system packages"
    sudo $PKG_UPDATE
}

install_docker() {
    log_step "2/7" "Installing Docker Engine"
    if command -v docker &> /dev/null; then
        log_warning "Docker already installed, skipping"
        return
    fi

    curl -fsSL https://get.docker.com -o get-docker.sh
    sudo sh get-docker.sh
    rm get-docker.sh
    sudo usermod -aG docker "$USER"
    
    sudo systemctl enable docker
    sudo systemctl start docker
}

install_docker_compose() {
    log_step "3/7" "Installing Docker Compose plugin"
    sudo $PKG_INSTALL docker-compose-plugin
}

create_directory_structure() {
    log_step "4/7" "Creating monitoring directory structure"
    
    mkdir -p "$INSTALL_DIR"/{prometheus,grafana/{dashboards,provisioning/{dashboards,datasources}}}
    chmod 755 "$INSTALL_DIR"
    chmod -R 755 "$INSTALL_DIR"/{prometheus,grafana}
}

create_configuration_files() {
    log_step "5/7" "Creating configuration files"
    
    # Prometheus configuration
    cat > "$INSTALL_DIR/prometheus/prometheus.yml" << 'EOF'
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "alert_rules.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets: []

scrape_configs:
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']

  - job_name: 'cadvisor'
    static_configs:
      - targets: ['cadvisor:8080']
    scrape_interval: 5s
    metrics_path: /metrics

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']
EOF

    # Prometheus alert rules
    cat > "$INSTALL_DIR/prometheus/alert_rules.yml" << 'EOF'
groups:
  - name: docker-containers
    rules:
      - alert: ContainerHighCPUUsage
        expr: rate(container_cpu_usage_seconds_total[5m]) * 100 > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Container {{ $labels.name }} high CPU usage"
          description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."

      - alert: ContainerHighMemoryUsage
        expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Container {{ $labels.name }} high memory usage"
          description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."

      - alert: ContainerDown
        expr: up == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Container {{ $labels.instance }} is down"
          description: "Container {{ $labels.instance }} has been down for more than 1 minute."
EOF

    # Grafana datasource
    cat > "$INSTALL_DIR/grafana/provisioning/datasources/datasource.yml" << 'EOF'
apiVersion: 1

datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://prometheus:9090
    isDefault: true
    editable: true
EOF

    # Grafana dashboard provisioning
    cat > "$INSTALL_DIR/grafana/provisioning/dashboards/dashboard.yml" << 'EOF'
apiVersion: 1

providers:
  - name: 'default'
    orgId: 1
    folder: ''
    type: file
    disableDeletion: false
    updateIntervalSeconds: 10
    options:
      path: /var/lib/grafana/dashboards
EOF

    chmod 644 "$INSTALL_DIR"/prometheus/*.yml
    chmod 644 "$INSTALL_DIR"/grafana/provisioning/*/*.yml
}

create_docker_compose() {
    log_step "6/7" "Creating Docker Compose configuration"
    
    cat > "$COMPOSE_FILE" << 'EOF'
version: '3.8'

services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    restart: unless-stopped
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus:/etc/prometheus
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=200h'
      - '--web.enable-lifecycle'

  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    restart: unless-stopped
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning

  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    container_name: cadvisor
    restart: unless-stopped
    ports:
      - "8080:8080"
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    privileged: true
    devices:
      - /dev/kmsg:/dev/kmsg

  node-exporter:
    image: prom/node-exporter:latest
    container_name: node-exporter
    restart: unless-stopped
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.rootfs=/rootfs'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'

volumes:
  prometheus_data:
  grafana_data:
EOF

    chmod 644 "$COMPOSE_FILE"
}

start_monitoring_stack() {
    log_step "7/7" "Starting monitoring stack"
    
    cd "$INSTALL_DIR"
    docker compose up -d
    
    # Wait for services to start
    sleep 10
}

verify_installation() {
    echo -e "${GREEN}Verifying installation...${NC}"
    
    local failed=0
    
    if ! docker compose ps | grep -q "Up"; then
        log_error "Some containers are not running"
        failed=1
    fi
    
    if ! curl -s http://localhost:9090/api/v1/query?query=up > /dev/null; then
        log_error "Prometheus is not accessible"
        failed=1
    fi
    
    if ! curl -s http://localhost:3000 > /dev/null; then
        log_error "Grafana is not accessible"
        failed=1
    fi
    
    if [ $failed -eq 0 ]; then
        echo -e "${GREEN}✓ Installation completed successfully!${NC}"
        echo -e "${GREEN}Access URLs:${NC}"
        echo "  Prometheus: http://localhost:9090"
        echo "  Grafana: http://localhost:3000 (admin/admin)"
        echo "  cAdvisor: http://localhost:8080"
        echo "  Node Exporter: http://localhost:9100"
        echo
        echo -e "${YELLOW}Note: You may need to logout and login again for Docker group membership to take effect${NC}"
    else
        exit 1
    fi
}

main() {
    if [ $# -ne 0 ]; then
        usage
    fi
    
    check_prerequisites
    detect_distro
    update_system
    install_docker
    install_docker_compose
    create_directory_structure
    create_configuration_files
    create_docker_compose
    start_monitoring_stack
    verify_installation
}

main "$@"

Review the script before running. Execute with: bash install.sh

#docker #prometheus #grafana #cadvisor #monitoring

Monitor Docker containers with Prometheus and Grafana using cAdvisor for comprehensive metrics collection