Configure Prometheus Alertmanager to send webhook notifications for Loki log-based alerts with proper routing, templating, and Grafana dashboard integration for comprehensive monitoring.
Prerequisites
- Existing Loki and Grafana installation
- Prometheus server running
- Python 3 for webhook receiver
- Network connectivity between components
What this solves
When running centralized logging with Loki and Grafana, you need reliable alerting for critical log patterns like application errors, security events, or system failures. This tutorial shows you how to configure Prometheus Alertmanager to receive alerts from Loki and route them through webhook notifications to various channels like Slack, Microsoft Teams, or custom endpoints.
Prerequisites
Before starting, ensure you have a working Loki and Grafana setup. If you need to set up log collection first, check our guide on configuring NGINX log analysis with Loki and Grafana. You'll also need Prometheus running on your system for Alertmanager integration.
Step-by-step configuration
Install Prometheus Alertmanager
Download and install the latest version of Alertmanager for your system.
sudo apt update
wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0/alertmanager-0.26.0.linux-amd64.tar.gz
tar xzf alertmanager-0.26.0.linux-amd64.tar.gz
sudo mv alertmanager-0.26.0.linux-amd64/alertmanager /usr/local/bin/
sudo mv alertmanager-0.26.0.linux-amd64/amtool /usr/local/bin/
Create Alertmanager user and directories
Set up a dedicated user and directory structure for Alertmanager with proper permissions.
sudo useradd --no-create-home --shell /bin/false alertmanager
sudo mkdir -p /etc/alertmanager /var/lib/alertmanager
sudo chown alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager
sudo chmod 755 /etc/alertmanager /var/lib/alertmanager
Configure Alertmanager with webhook routing
Create the main Alertmanager configuration with webhook receivers for different alert types.
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alertmanager@example.com'
resolve_timeout: 5m
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'default-webhook'
routes:
- match:
severity: critical
receiver: 'critical-webhook'
group_wait: 5s
repeat_interval: 30m
- match:
alertname: LokiLogError
receiver: 'log-error-webhook'
group_wait: 10s
repeat_interval: 15m
- match_re:
service: ^(nginx|apache)$
receiver: 'webserver-webhook'
receivers:
- name: 'default-webhook'
webhook_configs:
- url: 'http://203.0.113.10:9093/webhook/default'
send_resolved: true
http_config:
basic_auth:
username: 'webhook_user'
password: 'secure_webhook_password'
title: 'Alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
text: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Labels: {{ range .Labels.SortedPairs }}{{ .Name }}={{ .Value }} {{ end }}
{{ end }}
- name: 'critical-webhook'
webhook_configs:
- url: 'http://203.0.113.10:9093/webhook/critical'
send_resolved: true
http_config:
basic_auth:
username: 'webhook_user'
password: 'secure_webhook_password'
title: 'CRITICAL: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'log-error-webhook'
webhook_configs:
- url: 'http://203.0.113.10:9093/webhook/logs'
send_resolved: true
title: 'Log Error Detected: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'webserver-webhook'
webhook_configs:
- url: 'http://203.0.113.10:9093/webhook/webserver'
send_resolved: true
title: 'Web Server Alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
Configure Slack webhook integration
Add Slack webhook configuration for team notifications.
# Add this to your alertmanager.yml receivers section
- name: 'slack-notifications'
slack_configs:
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
channel: '#alerts'
username: 'Alertmanager'
icon_emoji: ':warning:'
title: 'Alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
text: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Severity: {{ .Labels.severity }}
Instance: {{ .Labels.instance }}
{{ if .Labels.service }}Service: {{ .Labels.service }}{{ end }}
{{ end }}
send_resolved: true
Set up Loki log alert rules
Create LogQL-based alert rules that Loki will evaluate and send to Alertmanager.
groups:
- name: log_alerts
rules:
- alert: HighErrorRate
expr: |
sum(rate({job="nginx"} |= "error" [5m])) by (instance) > 0.1
for: 2m
labels:
severity: warning
service: nginx
annotations:
summary: "High error rate detected on {{ $labels.instance }}"
description: "Error rate is {{ $value }} errors per second on {{ $labels.instance }}"
runbook_url: "https://wiki.example.com/runbook/nginx-errors"
- alert: CriticalApplicationError
expr: |
count_over_time({job="application"} |~ "CRITICAL|FATAL" [10m]) > 0
for: 0m
labels:
severity: critical
service: application
annotations:
summary: "Critical application error detected"
description: "Found {{ $value }} critical errors in the last 10 minutes"
- alert: LokiLogError
expr: |
sum(rate({job=~".+"} |= "ERROR" [5m])) > 0.5
for: 1m
labels:
severity: warning
annotations:
summary: "Elevated error log volume"
description: "Error log rate is {{ $value }} per second across all services"
- alert: SecurityEvent
expr: |
count_over_time({job="auth"} |~ "failed login|unauthorized access" [15m]) > 10
for: 0m
labels:
severity: critical
service: security
annotations:
summary: "Multiple security events detected"
description: "{{ $value }} security events in the last 15 minutes"
Configure Loki ruler
Update Loki configuration to enable the ruler component for processing alert rules.
# Add these sections to your existing Loki config
ruler:
storage:
type: local
local:
directory: /etc/loki/rules
rule_path: /tmp/loki/rules
alertmanager_url: http://localhost:9093
ring:
kvstore:
store: inmemory
enable_api: true
enable_alertmanager_v2: true
external_url: http://localhost:3100
limits_config:
ruler_evaluation_delay_duration: 0s
ruler_max_rules_per_group: 20
ruler_max_rule_groups_per_tenant: 35
Create systemd service for Alertmanager
Set up Alertmanager as a systemd service for automatic startup and management.
[Unit]
Description=Alertmanager
Wants=network-online.target
After=network-online.target
[Service]
User=alertmanager
Group=alertmanager
Type=simple
ExecStart=/usr/local/bin/alertmanager \
--config.file=/etc/alertmanager/alertmanager.yml \
--storage.path=/var/lib/alertmanager/ \
--web.external-url=http://localhost:9093 \
--web.listen-address=0.0.0.0:9093 \
--cluster.listen-address="" \
--log.level=info
Restart=always
RestartSec=5
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
Set proper file permissions
Ensure Alertmanager configuration files have correct ownership and permissions.
sudo chown -R alertmanager:alertmanager /etc/alertmanager
sudo chmod 640 /etc/alertmanager/alertmanager.yml
sudo chmod 755 /etc/alertmanager
Configure Grafana data source for Alertmanager
Add Alertmanager as a data source in Grafana for alert visualization.
curl -X POST \
http://admin:admin@localhost:3000/api/datasources \
-H 'Content-Type: application/json' \
-d '{
"name": "Alertmanager",
"type": "alertmanager",
"url": "http://localhost:9093",
"access": "proxy",
"isDefault": false
}'
Start and enable services
Start Alertmanager and enable it to run automatically on system boot.
sudo systemctl daemon-reload
sudo systemctl enable --now alertmanager
sudo systemctl restart loki
Configure webhook endpoints
Create a simple webhook receiver
Set up a basic webhook endpoint to test alert delivery.
#!/usr/bin/env python3
from flask import Flask, request, jsonify
import json
import logging
from datetime import datetime
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
@app.route('/webhook/default', methods=['POST'])
def default_webhook():
data = request.get_json()
logging.info(f"Received default alert: {json.dumps(data, indent=2)}")
for alert in data.get('alerts', []):
status = alert.get('status')
labels = alert.get('labels', {})
annotations = alert.get('annotations', {})
print(f"[{datetime.now()}] {status.upper()} - {annotations.get('summary', 'No summary')}")
print(f" Instance: {labels.get('instance', 'unknown')}")
print(f" Severity: {labels.get('severity', 'unknown')}")
return jsonify({"status": "received"}), 200
@app.route('/webhook/critical', methods=['POST'])
def critical_webhook():
data = request.get_json()
logging.critical(f"CRITICAL ALERT: {json.dumps(data, indent=2)}")
# Add your critical alert handling logic here
# Example: send to PagerDuty, call on-call engineer, etc.
return jsonify({"status": "critical_received"}), 200
@app.route('/webhook/logs', methods=['POST'])
def log_webhook():
data = request.get_json()
logging.warning(f"Log error alert: {json.dumps(data, indent=2)}")
return jsonify({"status": "log_received"}), 200
if __name__ == '__main__':
app.run(host='0.0.0.0', port=9093, debug=False)
Install webhook receiver dependencies
Install required Python packages for the webhook receiver.
sudo apt install -y python3-pip
pip3 install flask gunicorn
Set up Grafana alert integration
Create Grafana alert rules
Configure Grafana to create alerts based on Loki log queries and route them through Alertmanager.
# Navigate to Grafana UI: http://localhost:3000
Go to Alerting > Alert Rules > New Rule
Query A - Log Error Count
Data source: Loki
Query: sum(count_over_time({job="nginx"} |= "error" [5m]))
Legend: error_count
Query B - Threshold
Expression: $A > 10
Evaluation:
Condition: B
Evaluate every: 1m
Evaluate for: 2m
Labels:
severity: warning
service: nginx
team: infrastructure
Configure Grafana notification policy
Set up notification policies in Grafana to route alerts to Alertmanager.
# In Grafana UI:
Go to Alerting > Notification policies
Edit default policy:
Default contact point: alertmanager
Group by: [alertname, grafana_folder]
Group wait: 10s
Group interval: 5m
Repeat interval: 12h
Specific routes:
severity = critical
Contact point: alertmanager-critical
Group wait: 0s
Repeat interval: 30m
team = infrastructure
Contact point: alertmanager-infra
Override grouping: [service]
Advanced webhook templating
Create advanced webhook templates
Set up sophisticated alert templates with rich formatting and conditional logic.
{{ define "webhook.title" }}
{{ if eq .Status "firing" }}🔥 FIRING{{ else }}✅ RESOLVED{{ end }}:
{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}
{{ end }}
{{ define "webhook.message" }}
{{ if eq .Status "firing" }}
⚠️ ALERT FIRING ⚠️
{{ else }}
✅ ALERT RESOLVED ✅
{{ end }}
Summary: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}
Details:
{{ range .Alerts }}
• Alert: {{ .Labels.alertname }}
• Instance: {{ .Labels.instance | default "N/A" }}
• Severity: {{ .Labels.severity | default "unknown" }}
• Service: {{ .Labels.service | default "N/A" }}
{{ if .Annotations.description }}• Description: {{ .Annotations.description }}{{ end }}
{{ if .Annotations.runbook_url }}• Runbook: {{ .Annotations.runbook_url }}{{ end }}
• Started: {{ .StartsAt.Format "2006-01-02 15:04:05 UTC" }}
{{ if ne .Status "firing" }}• Resolved: {{ .EndsAt.Format "2006-01-02 15:04:05 UTC" }}{{ end }}
---
{{ end }}
Alert Dashboard: http://localhost:3000/alerting/list
{{ end }}
Update Alertmanager configuration with templates
Modify the Alertmanager configuration to use custom templates.
# Add to global section:
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alertmanager@example.com'
resolve_timeout: 5m
Add templates section:
templates:
- '/etc/alertmanager/templates/*.tmpl'
Update webhook receivers to use templates:
receivers:
- name: 'default-webhook'
webhook_configs:
- url: 'http://203.0.113.10:9093/webhook/default'
send_resolved: true
title: '{{ template "webhook.title" . }}'
text: '{{ template "webhook.message" . }}'
http_config:
basic_auth:
username: 'webhook_user'
password: 'secure_webhook_password'
Verify your setup
# Check Alertmanager status
sudo systemctl status alertmanager
Verify Alertmanager is listening
sudo netstat -tlnp | grep :9093
Check Loki ruler is running
curl http://localhost:3100/ruler/api/v1/rules
Test webhook endpoint
curl -X POST http://203.0.113.10:9093/webhook/default \
-H "Content-Type: application/json" \
-d '{"alerts":[{"status":"firing","labels":{"alertname":"test","severity":"warning"},"annotations":{"summary":"Test alert"}}]}'
Verify Alertmanager configuration
sudo /usr/local/bin/amtool config show --alertmanager.url=http://localhost:9093
Check active alerts
sudo /usr/local/bin/amtool alert --alertmanager.url=http://localhost:9093
Test alert generation
# Generate test log entries to trigger alerts
echo "$(date) ERROR: Test error message for alerting" | sudo tee -a /var/log/nginx/error.log
Generate multiple errors to exceed threshold
for i in {1..15}; do
echo "$(date) ERROR: Test error $i for rate limit alert" | sudo tee -a /var/log/nginx/error.log
sleep 1
done
Check if Loki received the logs
curl -G -s "http://localhost:3100/loki/api/v1/query" \
--data-urlencode 'query={job="nginx"} |= "ERROR"' \
--data-urlencode 'limit=10' | jq .
Force rule evaluation (for testing)
curl -X POST http://localhost:3100/ruler/api/v1/rules/fake/log_alerts/HighErrorRate
Monitor and troubleshoot
# View Alertmanager logs
sudo journalctl -u alertmanager -f
Check Loki ruler logs
sudo journalctl -u loki -f | grep ruler
View active silences
sudo /usr/local/bin/amtool silence query --alertmanager.url=http://localhost:9093
Create temporary silence for testing
sudo /usr/local/bin/amtool silence add alertname=HighErrorRate --comment="Testing" --duration=1h --alertmanager.url=http://localhost:9093
Check webhook receiver logs (if running the Python script)
ps aux | grep webhook-receiver
tail -f /var/log/webhook-receiver.log
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Alerts not firing | Loki ruler not evaluating rules | Check /etc/loki/config.yml ruler configuration and restart Loki |
| Webhook not receiving alerts | Network connectivity or authentication | Test webhook URL manually with curl and check firewall rules |
| Alertmanager config invalid | YAML syntax or validation errors | Run amtool config check /etc/alertmanager/alertmanager.yml |
| High memory usage | Too many active alerts in memory | Adjust resolve_timeout and implement alert silencing policies |
| Alerts not resolving | LogQL query always returns true | Review LogQL expressions and add proper resolution conditions |
| Template rendering errors | Invalid template syntax | Check Alertmanager logs and validate template syntax |
Next steps
- Configure Prometheus Alertmanager for email and Slack notifications
- Setup Grafana alerting with Slack and Microsoft Teams integration
- Configure Loki log retention and archiving policies
- Implement Alertmanager high availability clustering
- Setup PagerDuty integration with Alertmanager webhooks
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
ALERTMANAGER_VERSION="0.26.0"
WEBHOOK_URL="${1:-http://203.0.113.10:9093/webhook}"
WEBHOOK_USER="${2:-webhook_user}"
WEBHOOK_PASS="${3:-secure_webhook_password}"
# Usage function
usage() {
echo "Usage: $0 [webhook_url] [webhook_user] [webhook_password]"
echo "Example: $0 http://192.168.1.100:9093/webhook myuser mypass"
exit 1
}
# Cleanup function
cleanup() {
echo -e "${RED}[ERROR] Installation failed. Cleaning up...${NC}"
systemctl stop alertmanager 2>/dev/null || true
systemctl disable alertmanager 2>/dev/null || true
rm -f /etc/systemd/system/alertmanager.service
rm -rf /tmp/alertmanager-*
userdel alertmanager 2>/dev/null || true
exit 1
}
trap cleanup ERR
# Check prerequisites
check_prerequisites() {
echo -e "${BLUE}[1/8] Checking prerequisites...${NC}"
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root${NC}"
exit 1
fi
if ! command -v wget &> /dev/null; then
echo -e "${RED}wget is required but not installed${NC}"
exit 1
fi
echo -e "${GREEN}Prerequisites check passed${NC}"
}
# Detect distribution
detect_distro() {
echo -e "${BLUE}[2/8] Detecting distribution...${NC}"
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
FIREWALL_CMD="ufw"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
FIREWALL_CMD="firewall-cmd"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
FIREWALL_CMD="firewall-cmd"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}Detected $PRETTY_NAME using $PKG_MGR${NC}"
}
# Update system and install dependencies
install_dependencies() {
echo -e "${BLUE}[3/8] Installing dependencies...${NC}"
$PKG_UPDATE
$PKG_INSTALL curl tar
echo -e "${GREEN}Dependencies installed${NC}"
}
# Download and install Alertmanager
install_alertmanager() {
echo -e "${BLUE}[4/8] Installing Alertmanager...${NC}"
cd /tmp
wget -q "https://github.com/prometheus/alertmanager/releases/download/v${ALERTMANAGER_VERSION}/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz"
tar xzf "alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz"
mv "alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/alertmanager" /usr/local/bin/
mv "alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/amtool" /usr/local/bin/
chmod 755 /usr/local/bin/alertmanager /usr/local/bin/amtool
rm -rf /tmp/alertmanager-*
echo -e "${GREEN}Alertmanager binaries installed${NC}"
}
# Create user and directories
create_user_directories() {
echo -e "${BLUE}[5/8] Creating user and directories...${NC}"
useradd --no-create-home --shell /bin/false alertmanager || true
mkdir -p /etc/alertmanager /var/lib/alertmanager
chown alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager
chmod 755 /etc/alertmanager /var/lib/alertmanager
echo -e "${GREEN}User and directories created${NC}"
}
# Create configuration
create_configuration() {
echo -e "${BLUE}[6/8] Creating configuration...${NC}"
cat > /etc/alertmanager/alertmanager.yml << EOF
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alertmanager@example.com'
resolve_timeout: 5m
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'default-webhook'
routes:
- match:
severity: critical
receiver: 'critical-webhook'
group_wait: 5s
repeat_interval: 30m
- match:
alertname: LokiLogError
receiver: 'log-error-webhook'
group_wait: 10s
repeat_interval: 15m
- match_re:
service: ^(nginx|apache)$
receiver: 'webserver-webhook'
receivers:
- name: 'default-webhook'
webhook_configs:
- url: '${WEBHOOK_URL}/default'
send_resolved: true
http_config:
basic_auth:
username: '${WEBHOOK_USER}'
password: '${WEBHOOK_PASS}'
title: 'Alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
text: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Labels: {{ range .Labels.SortedPairs }}{{ .Name }}={{ .Value }} {{ end }}
{{ end }}
- name: 'critical-webhook'
webhook_configs:
- url: '${WEBHOOK_URL}/critical'
send_resolved: true
http_config:
basic_auth:
username: '${WEBHOOK_USER}'
password: '${WEBHOOK_PASS}'
title: 'CRITICAL: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'log-error-webhook'
webhook_configs:
- url: '${WEBHOOK_URL}/logs'
send_resolved: true
title: 'Log Error Detected: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'webserver-webhook'
webhook_configs:
- url: '${WEBHOOK_URL}/webserver'
send_resolved: true
title: 'Web Server Alert: {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
EOF
chown alertmanager:alertmanager /etc/alertmanager/alertmanager.yml
chmod 644 /etc/alertmanager/alertmanager.yml
echo -e "${GREEN}Configuration created${NC}"
}
# Create systemd service
create_systemd_service() {
echo -e "${BLUE}[7/8] Creating systemd service...${NC}"
cat > /etc/systemd/system/alertmanager.service << EOF
[Unit]
Description=Prometheus Alertmanager
Wants=network-online.target
After=network-online.target
[Service]
User=alertmanager
Group=alertmanager
Type=simple
ExecStart=/usr/local/bin/alertmanager \
--config.file /etc/alertmanager/alertmanager.yml \
--storage.path /var/lib/alertmanager/ \
--web.external-url=http://localhost:9093/
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable alertmanager
systemctl start alertmanager
echo -e "${GREEN}Systemd service created and started${NC}"
}
# Configure firewall
configure_firewall() {
echo -e "${BLUE}[8/8] Configuring firewall...${NC}"
case "$PKG_MGR" in
apt)
if command -v ufw &> /dev/null; then
ufw allow 9093/tcp
echo -e "${GREEN}UFW rule added for port 9093${NC}"
fi
;;
dnf|yum)
if command -v firewall-cmd &> /dev/null && systemctl is-active --quiet firewalld; then
firewall-cmd --permanent --add-port=9093/tcp
firewall-cmd --reload
echo -e "${GREEN}Firewalld rule added for port 9093${NC}"
fi
;;
esac
}
# Verify installation
verify_installation() {
echo -e "${BLUE}Verifying installation...${NC}"
if systemctl is-active --quiet alertmanager; then
echo -e "${GREEN}✓ Alertmanager service is running${NC}"
else
echo -e "${RED}✗ Alertmanager service is not running${NC}"
return 1
fi
if curl -s http://localhost:9093/-/healthy > /dev/null; then
echo -e "${GREEN}✓ Alertmanager is responding on port 9093${NC}"
else
echo -e "${YELLOW}⚠ Alertmanager may not be responding (this might be normal if starting)${NC}"
fi
echo -e "${GREEN}Alertmanager installation completed successfully!${NC}"
echo -e "${BLUE}Access Alertmanager at: http://$(hostname -I | awk '{print $1}'):9093${NC}"
echo -e "${BLUE}Configuration file: /etc/alertmanager/alertmanager.yml${NC}"
}
# Main execution
main() {
check_prerequisites
detect_distro
install_dependencies
install_alertmanager
create_user_directories
create_configuration
create_systemd_service
configure_firewall
verify_installation
}
main "$@"
Review the script before running. Execute with: bash install.sh