Set up comprehensive Prometheus alerting rules for monitoring cgroup resource usage with automated threshold alerts for CPU, memory, and I/O limits. Configure Alertmanager notifications for container resource exhaustion and system health monitoring.
Prerequisites
- Prometheus server installed and configured
- Docker or container runtime
- Root access for cgroup configuration
- Basic understanding of YAML syntax
What this solves
Container workloads can consume system resources unpredictably, leading to performance degradation or service outages when resource limits are exceeded. This tutorial configures Prometheus to monitor cgroup metrics and creates alerting rules that notify you before containers exhaust their allocated CPU, memory, or I/O resources. You'll set up proactive monitoring that catches resource pressure early and integrates with Alertmanager for automated notifications via email and Slack.
Prerequisites
Before starting, ensure you have a working Prometheus setup and cgroups v2 enabled on your system. This tutorial builds on container resource management concepts covered in our Linux cgroups v2 configuration guide.
Step-by-step configuration
Install cAdvisor for container metrics
cAdvisor collects container resource usage and performance characteristics. Install it to expose cgroup metrics to Prometheus.
sudo apt update
sudo apt install -y docker.io
sudo systemctl enable --now docker
sudo docker run -d \
--name=cadvisor \
--restart=always \
--publish=8080:8080 \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--volume=/dev/disk/:/dev/disk:ro \
--privileged \
--device=/dev/kmsg \
gcr.io/cadvisor/cadvisor:v0.47.0
Install node_exporter for system metrics
Node_exporter provides detailed system-level cgroup metrics that complement cAdvisor's container-focused data.
cd /tmp
wget https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz
tar xvfz node_exporter-1.6.1.linux-amd64.tar.gz
sudo mv node_exporter-1.6.1.linux-amd64/node_exporter /usr/local/bin/
sudo useradd -rs /bin/false node_exporter
[Unit]
Description=Node Exporter
After=network.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter --collector.systemd --collector.processes
[Install]
WantedBy=multi-user.target
sudo systemctl daemon-reload
sudo systemctl enable --now node_exporter
Configure Prometheus scrape targets
Add cAdvisor and node_exporter as scrape targets in your Prometheus configuration to collect cgroup metrics.
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "cgroup_alerts.yml"
- "container_alerts.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets: ['localhost:9100']
scrape_interval: 10s
metrics_path: /metrics
- job_name: 'cadvisor'
static_configs:
- targets: ['localhost:8080']
scrape_interval: 10s
metrics_path: /metrics
Create cgroup alerting rules
Define alerting rules that monitor cgroup resource usage and trigger when containers approach their limits.
groups:
- name: cgroup_resource_alerts
rules:
- alert: CgroupMemoryUsageHigh
expr: |
(container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 85
for: 5m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} memory usage is high"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is using {{ $value | humanizePercentage }} of its memory limit for more than 5 minutes."
- alert: CgroupMemoryUsageCritical
expr: |
(container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 95
for: 2m
labels:
severity: critical
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} memory usage is critical"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is using {{ $value | humanizePercentage }} of its memory limit. OOM kill is imminent."
- alert: CgroupCPUUsageHigh
expr: |
rate(container_cpu_usage_seconds_total{name!=""}[5m]) > 0.8
for: 10m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} CPU usage is high"
description: "Container {{ $labels.name }} on {{ $labels.instance }} CPU usage is {{ $value | humanizePercentage }} for more than 10 minutes."
- alert: CgroupIOWaitHigh
expr: |
rate(container_fs_io_time_seconds_total{name!=""}[5m]) > 0.5
for: 5m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} I/O wait time is high"
description: "Container {{ $labels.name }} on {{ $labels.instance }} has high I/O wait time: {{ $value }}s over the last 5 minutes."
- alert: CgroupMemoryOOMKilled
expr: |
increase(container_memory_failures_total{type="oom",name!=""}[1m]) > 0
for: 0m
labels:
severity: critical
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} was OOM killed"
description: "Container {{ $labels.name }} on {{ $labels.instance }} was killed due to out of memory condition."
Create container resource threshold alerts
Add specific alerting rules for container resource limits and system-wide cgroup pressure monitoring.
groups:
- name: container_resource_alerts
rules:
- alert: SystemMemoryPressure
expr: |
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 90
for: 5m
labels:
severity: critical
service: system_monitoring
annotations:
summary: "System memory pressure is high"
description: "System memory usage on {{ $labels.instance }} is {{ $value | humanizePercentage }}. Available memory is critically low."
- alert: CgroupControllerDisabled
expr: |
node_cgroup_controller_enabled{controller="memory"} == 0
for: 0m
labels:
severity: warning
service: system_monitoring
annotations:
summary: "Cgroup memory controller is disabled"
description: "Memory cgroup controller is disabled on {{ $labels.instance }}. Container memory limits will not be enforced."
- alert: ContainerRestartingFrequently
expr: |
increase(container_start_time_seconds{name!=""}[1h]) > 5
for: 0m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} is restarting frequently"
description: "Container {{ $labels.name }} on {{ $labels.instance }} has restarted {{ $value }} times in the last hour."
- alert: CgroupCPUThrottling
expr: |
increase(container_cpu_cfs_throttled_seconds_total{name!=""}[5m]) > 0
for: 5m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} CPU is being throttled"
description: "Container {{ $labels.name }} on {{ $labels.instance }} has been CPU throttled for {{ $value }}s in the last 5 minutes."
- alert: CgroupBlockIOHigh
expr: |
rate(container_fs_reads_bytes_total{name!=""}[5m]) + rate(container_fs_writes_bytes_total{name!=""}[5m]) > 50000000
for: 10m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} has high disk I/O"
description: "Container {{ $labels.name }} on {{ $labels.instance }} disk I/O is {{ $value | humanizeBytes }}/s for more than 10 minutes."
- alert: CgroupNetworkRxHigh
expr: |
rate(container_network_receive_bytes_total{name!=""}[5m]) > 10000000
for: 5m
labels:
severity: info
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} high network receive traffic"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is receiving {{ $value | humanizeBytes }}/s network traffic."
Install and configure Alertmanager
Set up Alertmanager to handle alert routing and notifications from Prometheus.
sudo apt install -y alertmanager
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alerts@example.com'
smtp_auth_username: 'alerts@example.com'
smtp_auth_password: 'your-smtp-password'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
routes:
- match:
severity: critical
receiver: 'critical-alerts'
repeat_interval: 15m
- match:
severity: warning
receiver: 'warning-alerts'
repeat_interval: 1h
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
- name: 'critical-alerts'
email_configs:
- to: 'admin@example.com'
subject: 'CRITICAL: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Instance: {{ .Labels.instance }}
Severity: {{ .Labels.severity }}
{{ end }}
slack_configs:
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
channel: '#alerts'
title: 'Critical Container Alert'
text: '{{ range .Alerts }}{{ .Annotations.summary }}: {{ .Annotations.description }}{{ end }}'
- name: 'warning-alerts'
email_configs:
- to: 'team@example.com'
subject: 'WARNING: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Instance: {{ .Labels.instance }}
{{ end }}
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']
Configure advanced cgroup monitoring
Set up additional monitoring rules for cgroup subsystem health and resource controller status.
groups:
- name: advanced_cgroup_monitoring
rules:
- alert: CgroupSubsystemUnavailable
expr: |
up{job="node_exporter"} == 0
for: 1m
labels:
severity: critical
service: monitoring
annotations:
summary: "Cgroup monitoring is down"
description: "Node exporter on {{ $labels.instance }} is down. Cgroup metrics are not being collected."
- alert: CgroupMemoryLeakDetection
expr: |
increase(container_memory_usage_bytes{name!=""}[1h]) > 100000000
for: 0m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Potential memory leak in container {{ $labels.name }}"
description: "Container {{ $labels.name }} memory usage increased by {{ $value | humanizeBytes }} in the last hour."
- alert: CgroupSwapUsageHigh
expr: |
(container_memory_swap{name!=""} / container_spec_memory_swap_limit_bytes{name!=""}) * 100 > 50
for: 5m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} swap usage is high"
description: "Container {{ $labels.name }} is using {{ $value | humanizePercentage }} of its swap limit."
- record: cgroup:memory_utilization_percent
expr: |
(container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100
- record: cgroup:cpu_utilization_rate
expr: |
rate(container_cpu_usage_seconds_total{name!=""}[5m])
- record: cgroup:network_io_bytes_per_second
expr: |
rate(container_network_transmit_bytes_total{name!=""}[5m]) + rate(container_network_receive_bytes_total{name!=""}[5m])
Enable and restart services
Restart Prometheus and Alertmanager to load the new configuration and alerting rules.
sudo systemctl enable --now alertmanager
sudo systemctl restart prometheus
sudo systemctl status prometheus alertmanager
sudo chown prometheus:prometheus /etc/prometheus/*.yml
sudo chmod 644 /etc/prometheus/*.yml
Test alerting rules
Validate your alerting rules syntax and test alert generation with resource pressure simulation.
promtool check rules /etc/prometheus/cgroup_alerts.yml
promtool check rules /etc/prometheus/container_alerts.yml
promtool check rules /etc/prometheus/advanced_cgroup_alerts.yml
# Test memory pressure alert
docker run --rm --memory=100m --memory-swap=100m stress:latest --vm 1 --vm-bytes 150M --timeout 60s
Check active alerts
curl -s http://localhost:9090/api/v1/alerts | jq '.data.alerts[] | select(.state=="firing") | {alertname: .labels.alertname, instance: .labels.instance}'
Configure notification channels
Set up Slack notifications
Configure Slack webhook integration for real-time container resource alerts.
# Create Slack webhook (external step)
1. Go to https://api.slack.com/messaging/webhooks
2. Create new webhook for your workspace
3. Copy webhook URL to alertmanager.yml
Update the Slack configuration in /etc/alertmanager/alertmanager.yml with your webhook URL:
slack_configs:
- api_url: 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX'
channel: '#container-alerts'
username: 'Prometheus'
icon_emoji: ':warning:'
title: 'Container Resource Alert - {{ .GroupLabels.alertname }}'
text: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Instance: {{ .Labels.instance }}
Container: {{ .Labels.name }}
Severity: {{ .Labels.severity }}
{{ end }}
send_resolved: true
Configure PagerDuty integration
Set up PagerDuty for critical container alerts requiring immediate attention.
receivers:
- name: 'critical-alerts'
pagerduty_configs:
- service_key: 'YOUR_PAGERDUTY_SERVICE_KEY'
description: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
details:
alert_count: '{{ len .Alerts }}'
instance: '{{ range .Alerts }}{{ .Labels.instance }}{{ end }}'
container: '{{ range .Alerts }}{{ .Labels.name }}{{ end }}'
severity: '{{ range .Alerts }}{{ .Labels.severity }}{{ end }}'
email_configs:
- to: 'oncall@example.com'
subject: 'CRITICAL Container Alert'
body: |
Critical container resource alert triggered.
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Container: {{ .Labels.name }}
Instance: {{ .Labels.instance }}
Description: {{ .Annotations.description }}
{{ end }}
Verify your setup
# Check Prometheus targets
curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[] | select(.health != "up") | {job: .labels.job, health: .health}'
Verify alerting rules are loaded
curl -s http://localhost:9090/api/v1/rules | jq '.data.groups[] | {name: .name, rules: (.rules | length)}'
Check Alertmanager status
curl -s http://localhost:9093/api/v1/status | jq '.data'
Test cAdvisor metrics
curl -s http://localhost:8080/metrics | grep container_memory_usage_bytes | head -5
Verify node_exporter cgroup metrics
curl -s http://localhost:9100/metrics | grep node_cgroup
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| No container metrics in Prometheus | cAdvisor not running or accessible | sudo docker logs cadvisor and check port 8080 |
| Alerting rules not loading | YAML syntax error in rules file | promtool check rules /etc/prometheus/*.yml |
| Alerts not firing despite high usage | Missing or incorrect metric labels | Check container_memory_usage_bytes{name!=""} query in Prometheus UI |
| Alertmanager not receiving alerts | Prometheus alertmanager config incorrect | Verify localhost:9093 in prometheus.yml alerting section |
| Email notifications not working | SMTP configuration error | Check SMTP credentials and sudo systemctl status alertmanager |
| Permission denied on config files | Incorrect file ownership | sudo chown prometheus:prometheus /etc/prometheus/*.yml |
Advanced configuration
For production environments, consider implementing additional monitoring patterns covered in our Kubernetes Prometheus Operator guide for orchestrated container workloads, or explore centralized logging integration with our Grafana Loki alerting tutorial.
You can also extend this setup with custom exporters for application-specific metrics and integrate with service discovery for dynamic container monitoring in cluster environments.
Next steps
- Create Grafana dashboards for container metrics visualization
- Implement Prometheus recording rules for cgroup metrics aggregation
- Set up Alertmanager webhook integration with incident management systems
- Configure Prometheus federation for multi-cluster container monitoring
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Configuration
PROMETHEUS_CONFIG_DIR="/etc/prometheus"
PROMETHEUS_USER="prometheus"
CADVISOR_VERSION="v0.47.0"
NODE_EXPORTER_VERSION="1.6.1"
# Cleanup function
cleanup() {
echo -e "${RED}Installation failed. Cleaning up...${NC}"
systemctl stop node_exporter 2>/dev/null || true
docker stop cadvisor 2>/dev/null || true
docker rm cadvisor 2>/dev/null || true
exit 1
}
trap cleanup ERR
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -p PATH Prometheus config directory (default: /etc/prometheus)"
echo " -h Show this help"
exit 1
}
# Parse arguments
while getopts "p:h" opt; do
case $opt in
p) PROMETHEUS_CONFIG_DIR="$OPTARG" ;;
h) usage ;;
*) usage ;;
esac
done
# Check if running as root
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root${NC}"
exit 1
fi
# Detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
;;
*)
echo -e "${RED}Unsupported distro: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}[1/8] Updating package repositories${NC}"
$PKG_UPDATE
echo -e "${GREEN}[2/8] Installing Docker${NC}"
if command -v docker >/dev/null 2>&1; then
echo -e "${YELLOW}Docker already installed${NC}"
else
case "$PKG_MGR" in
apt)
$PKG_INSTALL docker.io
;;
dnf|yum)
$PKG_INSTALL docker
;;
esac
fi
systemctl enable --now docker
usermod -aG docker root
echo -e "${GREEN}[3/8] Installing cAdvisor container${NC}"
docker stop cadvisor 2>/dev/null || true
docker rm cadvisor 2>/dev/null || true
docker run -d \
--name=cadvisor \
--restart=always \
--publish=8080:8080 \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--volume=/dev/disk/:/dev/disk:ro \
--privileged \
--device=/dev/kmsg \
gcr.io/cadvisor/cadvisor:${CADVISOR_VERSION}
echo -e "${GREEN}[4/8] Installing node_exporter${NC}"
cd /tmp
wget -q https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz
tar xzf node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64.tar.gz
mv node_exporter-${NODE_EXPORTER_VERSION}.linux-amd64/node_exporter /usr/local/bin/
chmod 755 /usr/local/bin/node_exporter
rm -rf node_exporter-*
if ! id "node_exporter" &>/dev/null; then
useradd -rs /bin/false node_exporter
fi
cat > /etc/systemd/system/node_exporter.service << 'EOF'
[Unit]
Description=Node Exporter
After=network.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter --collector.systemd --collector.processes
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable --now node_exporter
echo -e "${GREEN}[5/8] Creating Prometheus directories${NC}"
mkdir -p "$PROMETHEUS_CONFIG_DIR"
if ! id "$PROMETHEUS_USER" &>/dev/null; then
useradd -rs /bin/false "$PROMETHEUS_USER"
fi
chown -R "$PROMETHEUS_USER:$PROMETHEUS_USER" "$PROMETHEUS_CONFIG_DIR"
echo -e "${GREEN}[6/8] Creating Prometheus configuration${NC}"
cat > "$PROMETHEUS_CONFIG_DIR/prometheus.yml" << EOF
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "cgroup_alerts.yml"
- "container_alerts.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets: ['localhost:9100']
scrape_interval: 10s
metrics_path: /metrics
- job_name: 'cadvisor'
static_configs:
- targets: ['localhost:8080']
scrape_interval: 10s
metrics_path: /metrics
EOF
echo -e "${GREEN}[7/8] Creating alerting rules${NC}"
cat > "$PROMETHEUS_CONFIG_DIR/cgroup_alerts.yml" << 'EOF'
groups:
- name: cgroup_resource_alerts
rules:
- alert: CgroupMemoryUsageHigh
expr: |
(container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 85
for: 5m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} memory usage is high"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is using {{ $value | humanizePercentage }} of its memory limit for more than 5 minutes."
- alert: CgroupMemoryUsageCritical
expr: |
(container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 95
for: 2m
labels:
severity: critical
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} memory usage is critical"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is using {{ $value | humanizePercentage }} of its memory limit."
- alert: CgroupCPUUsageHigh
expr: |
rate(container_cpu_usage_seconds_total{name!=""}[5m]) * 100 > 80
for: 10m
labels:
severity: warning
service: container_monitoring
annotations:
summary: "Container {{ $labels.name }} CPU usage is high"
description: "Container {{ $labels.name }} on {{ $labels.instance }} is using {{ $value }}% CPU for more than 10 minutes."
EOF
cat > "$PROMETHEUS_CONFIG_DIR/container_alerts.yml" << 'EOF'
groups:
- name: container_alerts
rules:
- alert: ContainerKilled
expr: time() - container_last_seen > 60
for: 0m
labels:
severity: critical
service: container_monitoring
annotations:
summary: "Container killed"
description: "A container has disappeared for more than 1 minute."
- alert: ContainerAbsent
expr: absent(up{job="cadvisor"})
for: 5m
labels:
severity: critical
service: container_monitoring
annotations:
summary: "Container monitoring service down"
description: "cAdvisor has disappeared from Prometheus service discovery."
EOF
chown -R "$PROMETHEUS_USER:$PROMETHEUS_USER" "$PROMETHEUS_CONFIG_DIR"
chmod 644 "$PROMETHEUS_CONFIG_DIR"/*.yml
echo -e "${GREEN}[8/8] Verifying installation${NC}"
sleep 5
if ! systemctl is-active --quiet node_exporter; then
echo -e "${RED}node_exporter is not running${NC}"
exit 1
fi
if ! docker ps | grep -q cadvisor; then
echo -e "${RED}cAdvisor container is not running${NC}"
exit 1
fi
if ! curl -s http://localhost:9100/metrics >/dev/null; then
echo -e "${RED}node_exporter metrics endpoint is not accessible${NC}"
exit 1
fi
if ! curl -s http://localhost:8080/metrics >/dev/null; then
echo -e "${RED}cAdvisor metrics endpoint is not accessible${NC}"
exit 1
fi
echo -e "${GREEN}Installation completed successfully!${NC}"
echo -e "${YELLOW}Configuration files created in: $PROMETHEUS_CONFIG_DIR${NC}"
echo -e "${YELLOW}Services status:${NC}"
echo " - node_exporter: http://localhost:9100/metrics"
echo " - cAdvisor: http://localhost:8080/metrics"
echo -e "${YELLOW}Please restart Prometheus to load the new configuration.${NC}"
Review the script before running. Execute with: bash install.sh