Set up comprehensive system monitoring using collectd to collect performance metrics and InfluxDB 1.8 as a time-series database backend. This tutorial covers installation, configuration, and retention policies for production monitoring environments.
Prerequisites
- Root or sudo access
- At least 2GB RAM
- 10GB available disk space
What this solves
Modern infrastructure requires continuous monitoring of system performance metrics like CPU usage, memory consumption, disk I/O, and network statistics. Collectd provides a lightweight daemon that collects these metrics with minimal system overhead, while InfluxDB 1.8 offers a robust time-series database for storing and querying performance data.
This monitoring stack enables you to track system trends, identify performance bottlenecks, and set up alerting for critical thresholds. Unlike simple monitoring solutions, this setup provides granular metrics collection with configurable retention policies and efficient data compression.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you get the latest versions and security patches.
sudo apt update && sudo apt upgrade -y
Install InfluxDB 1.8
Install InfluxDB 1.8 from the official repository. We use version 1.8 specifically for its mature feature set and stability.
wget -qO- https://repos.influxdata.com/influxdb.key | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/influxdb.gpg > /dev/null
echo "deb [signed-by=/etc/apt/trusted.gpg.d/influxdb.gpg] https://repos.influxdata.com/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/influxdb.list
sudo apt update
sudo apt install -y influxdb
Configure InfluxDB
Modify the InfluxDB configuration to optimize for metrics storage and enable the collectd input plugin.
[meta]
dir = "/var/lib/influxdb/meta"
[data]
dir = "/var/lib/influxdb/data"
wal-dir = "/var/lib/influxdb/wal"
series-id-set-cache-size = 100
[coordinator]
write-timeout = "10s"
max-concurrent-queries = 0
query-timeout = "0s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[retention]
enabled = true
check-interval = "30m"
[shard-precreation]
enabled = true
check-interval = "10m"
advance-period = "30m"
[monitor]
store-enabled = true
store-database = "_internal"
store-interval = "10s"
[subscriber]
enabled = true
http-timeout = "30s"
[http]
enabled = true
bind-address = ":8086"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = true
https-enabled = false
max-row-limit = 0
max-connection-limit = 0
shared-secret = ""
realm = "InfluxDB"
[[collectd]]
enabled = true
bind-address = ":25826"
database = "collectd"
retention-policy = ""
batch-size = 5000
batch-pending = 10
batch-timeout = "10s"
read-buffer = 0
typesdb = "/usr/share/collectd/types.db"
Start and enable InfluxDB
Enable InfluxDB to start automatically on boot and start the service now.
sudo systemctl enable --now influxdb
sudo systemctl status influxdb
Create InfluxDB database and retention policies
Create a dedicated database for collectd metrics with appropriate retention policies for different data granularities.
influx -execute "CREATE DATABASE collectd"
influx -execute "CREATE RETENTION POLICY \"rp_1h\" ON \"collectd\" DURATION 7d REPLICATION 1"
influx -execute "CREATE RETENTION POLICY \"rp_1d\" ON \"collectd\" DURATION 30d REPLICATION 1"
influx -execute "CREATE RETENTION POLICY \"rp_1w\" ON \"collectd\" DURATION 365d REPLICATION 1 DEFAULT"
influx -execute "SHOW RETENTION POLICIES ON collectd"
Install collectd
Install collectd daemon and common plugins for system monitoring.
sudo apt install -y collectd collectd-utils
Configure collectd main settings
Create a comprehensive collectd configuration that enables essential system monitoring plugins and configures network output to InfluxDB.
Hostname "$(hostname)"
FQDNLookup true
BaseDir "/var/lib/collectd"
PluginDir "/usr/lib/collectd"
TypesDB "/usr/share/collectd/types.db"
AutoLoadPlugin false
CollectInternalStats false
Interval 10
MaxReadInterval 86400
Timeout 2
ReadThreads 5
WriteThreads 5
Logging
LoadPlugin syslog
LogLevel info
System monitoring plugins
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin processes
LoadPlugin swap
LoadPlugin uptime
LoadPlugin users
CPU plugin configuration
ReportByCpu true
ReportByState true
ValuesPercentage true
Disk space monitoring
MountPoint "/"
MountPoint "/var"
MountPoint "/tmp"
FSType "ext4"
FSType "xfs"
IgnoreSelected false
ReportByDevice false
ReportReserved true
ReportInodes true
ValuesAbsolute true
ValuesPercentage true
Disk I/O monitoring
Disk "/^[hsv]d[a-z]/"
IgnoreSelected false
UdevNameAttr "DEVNAME"
Network interface monitoring
Interface "eth0"
Interface "ens3"
Interface "lo"
IgnoreSelected false
Memory detailed monitoring
ValuesAbsolute true
ValuesPercentage true
Process monitoring
Process "collectd"
Process "influxdb"
Process "sshd"
Process "nginx"
Process "apache2"
ProcessMatch "java" "java.*"
Network plugin for sending to InfluxDB
LoadPlugin network
SecurityLevel None
Interface "lo"
TimeToLive 128
MaxPacketSize 1452
Forward false
CacheFlush 1800
ReportStats true
Configure collectd additional plugins
Enable additional monitoring plugins for more comprehensive system visibility.
# TCP connection monitoring
LoadPlugin tcpconns
ListeningPorts true
AllPortsSummary true
LocalPort "22"
LocalPort "80"
LocalPort "443"
LocalPort "8086"
Context switch monitoring
LoadPlugin contextswitch
Entropy monitoring
LoadPlugin entropy
IRQ monitoring
LoadPlugin irq
Irq 0
Irq 1
Irq 8
IgnoreSelected false
Thermal monitoring
LoadPlugin thermal
ForceUseProcfs false
Device "thermal_zone0"
Device "thermal_zone1"
IgnoreSelected false
System statistics
LoadPlugin vmem
Verbose true
File descriptor monitoring
LoadPlugin filecount
Instance "proc-fd"
Name "fd"
Recursive true
Set proper file permissions
Configure correct ownership and permissions for collectd directories and files. The collectd daemon needs read access to system files and write access to its data directory.
sudo chown -R collectd:collectd /var/lib/collectd
sudo chmod 755 /var/lib/collectd
sudo chmod 644 /etc/collectd/collectd.conf
sudo chmod 644 /etc/collectd/collectd.conf.d/additional.conf
Enable and start collectd
Start the collectd service and enable it to start automatically on system boot.
sudo systemctl enable --now collectd
sudo systemctl status collectd
Configure firewall rules
Open necessary ports for InfluxDB HTTP API and collectd network communication.
sudo ufw allow 8086/tcp comment "InfluxDB HTTP API"
sudo ufw allow from 127.0.0.1 to any port 25826 comment "collectd to InfluxDB"
sudo ufw reload
sudo ufw status
Verify your setup
Test that collectd is successfully sending metrics to InfluxDB and that data is being stored correctly.
# Check service status
sudo systemctl status influxdb collectd
Verify InfluxDB is receiving data
influx -execute "SHOW DATABASES"
influx -execute "USE collectd; SHOW MEASUREMENTS LIMIT 10"
Check recent CPU metrics
influx -execute "USE collectd; SELECT mean(value) FROM cpu_value WHERE time > now() - 5m GROUP BY time(1m), host, instance"
Monitor collectd logs
sudo journalctl -u collectd -f --lines=20
Check collectd network statistics
sudo collectdctl listval | grep network
Configure retention and continuous queries
Set up data downsampling
Create continuous queries to automatically downsample high-resolution data into lower-resolution aggregates for long-term storage.
# Create continuous query for hourly aggregates
influx -execute "USE collectd; CREATE CONTINUOUS QUERY cq_1h ON collectd BEGIN SELECT mean(value) as value INTO collectd.rp_1h.:MEASUREMENT FROM /./ GROUP BY time(1h), END"
Create continuous query for daily aggregates
influx -execute "USE collectd; CREATE CONTINUOUS QUERY cq_1d ON collectd BEGIN SELECT mean(value) as value INTO collectd.rp_1d.:MEASUREMENT FROM collectd.rp_1h././ GROUP BY time(1d), END"
List continuous queries
influx -execute "SHOW CONTINUOUS QUERIES"
Monitor data retention
Set up monitoring for retention policy enforcement and data cleanup.
# Check shard information
influx -execute "USE collectd; SHOW SHARDS"
Monitor database size
influx -execute "USE collectd; SELECT sum(diskBytes) FROM _internal..tsm1_filestore WHERE time > now() - 1h GROUP BY time(10m)"
Verify retention policy application
influx -execute "USE collectd; SHOW RETENTION POLICIES"
Performance optimization
Tune InfluxDB for metrics workload
Optimize InfluxDB configuration for high-throughput metrics collection.
# Add these optimizations to the existing config
[data]
# Increase cache sizes for better write performance
cache-max-memory-size = "1g"
cache-snapshot-memory-size = "25m"
cache-snapshot-write-cold-duration = "10m"
# Optimize compaction
compact-full-write-cold-duration = "4h"
compact-throughput = "48m"
compact-throughput-burst = "48m"
# TSM engine optimizations
tsm-use-madv-willneed = true
[coordinator]
# Optimize write performance
write-timeout = "30s"
max-concurrent-queries = 0
[http]
# Increase connection limits
max-connection-limit = 0
max-enqueued-write-limit = 0
enqueued-write-timeout = "30s"
Optimize collectd performance
Fine-tune collectd for minimal system impact while maintaining comprehensive monitoring.
# Performance optimizations
WriteThreads 8
WriteQueueLimitHigh 1000000
WriteQueueLimitLow 800000
Network plugin optimizations
SecurityLevel None
Interface "lo"
TimeToLive 128
MaxPacketSize 1452
Forward false
CacheFlush 1800
ReportStats false
Reduce disk plugin overhead
Disk "/^[hsv]d[a-z]/"
IgnoreSelected false
UdevNameAttr "DEVNAME"
UseBSDName false
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| collectd fails to start | Configuration syntax error | sudo collectd -T -C /etc/collectd/collectd.conf to test config |
| No data in InfluxDB | Network plugin not configured | Check network plugin config and port 25826 connectivity |
| InfluxDB connection refused | Service not running or firewall blocking | sudo systemctl status influxdb and check firewall rules |
| High CPU usage from collectd | Too many plugins or short interval | Increase interval to 30s and disable unused plugins |
| Permission denied errors | Wrong file ownership | sudo chown -R collectd:collectd /var/lib/collectd |
| Missing measurements in InfluxDB | Types.db file missing | Install collectd-core package or verify TypesDB path |
| Data not being retained properly | Retention policies not applied | Check retention policies and continuous queries are running |
Next steps
- Install and configure Grafana with Prometheus for system monitoring to visualize your metrics
- Set up Prometheus and Grafana monitoring stack with Docker compose for container-based monitoring
- Configure Grafana dashboards for InfluxDB collectd metrics to create custom visualizations
- Set up InfluxDB alerting with Kapacitor and notifications for automated monitoring alerts
- Optimize Linux system performance with kernel parameters and system tuning to improve the monitored system
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Global variables
SCRIPT_NAME=$(basename "$0")
INFLUXDB_USER="collectd"
INFLUXDB_DB="collectd"
INFLUXDB_PASS=$(openssl rand -base64 12)
# Usage function
usage() {
echo "Usage: $SCRIPT_NAME [OPTIONS]"
echo "Configure Linux performance monitoring with collectd and InfluxDB 1.8"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " -p PASSWORD Set InfluxDB password (default: auto-generated)"
exit 1
}
# Cleanup function for rollback
cleanup() {
echo -e "${RED}[ERROR]${NC} Installation failed. Rolling back changes..."
systemctl stop collectd influxdb 2>/dev/null || true
systemctl disable collectd influxdb 2>/dev/null || true
if [[ "$PKG_MGR" == "apt" ]]; then
apt remove -y collectd influxdb 2>/dev/null || true
else
$PKG_INSTALL remove collectd influxdb 2>/dev/null || true
fi
echo -e "${YELLOW}[WARNING]${NC} Rollback completed"
exit 1
}
trap cleanup ERR
# Check prerequisites
check_prerequisites() {
echo -e "${YELLOW}[INFO]${NC} Checking prerequisites..."
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}[ERROR]${NC} This script must be run as root or with sudo"
exit 1
fi
if ! command -v curl &> /dev/null && ! command -v wget &> /dev/null; then
echo -e "${RED}[ERROR]${NC} Either curl or wget is required"
exit 1
fi
if ! command -v openssl &> /dev/null; then
echo -e "${RED}[ERROR]${NC} OpenSSL is required for password generation"
exit 1
fi
}
# Detect distribution
detect_distro() {
if [[ ! -f /etc/os-release ]]; then
echo -e "${RED}[ERROR]${NC} Cannot detect distribution"
exit 1
fi
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_INSTALL="apt install -y"
PKG_UPDATE="apt update"
PKG_UPGRADE="apt upgrade -y"
COLLECTD_CONFIG="/etc/collectd/collectd.conf"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_INSTALL="dnf install -y"
PKG_UPDATE="dnf update -y"
PKG_UPGRADE="dnf upgrade -y"
COLLECTD_CONFIG="/etc/collectd.conf"
;;
amzn)
PKG_MGR="yum"
PKG_INSTALL="yum install -y"
PKG_UPDATE="yum update -y"
PKG_UPGRADE="yum upgrade -y"
COLLECTD_CONFIG="/etc/collectd.conf"
;;
*)
echo -e "${RED}[ERROR]${NC} Unsupported distribution: $ID"
exit 1
;;
esac
echo -e "${GREEN}[SUCCESS]${NC} Detected $PRETTY_NAME"
}
# Parse command line arguments
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
;;
-p)
INFLUXDB_PASS="$2"
shift 2
;;
*)
echo -e "${RED}[ERROR]${NC} Unknown option: $1"
usage
;;
esac
done
}
# Update system packages
update_system() {
echo "[1/6] Updating system packages..."
$PKG_UPDATE
$PKG_UPGRADE
echo -e "${GREEN}[SUCCESS]${NC} System packages updated"
}
# Install InfluxDB 1.8
install_influxdb() {
echo "[2/6] Installing InfluxDB 1.8..."
if [[ "$PKG_MGR" == "apt" ]]; then
wget -qO- https://repos.influxdata.com/influxdb.key | gpg --dearmor | tee /etc/apt/trusted.gpg.d/influxdb.gpg > /dev/null
echo "deb [signed-by=/etc/apt/trusted.gpg.d/influxdb.gpg] https://repos.influxdata.com/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/influxdb.list
apt update
$PKG_INSTALL influxdb
else
cat > /etc/yum.repos.d/influxdb.repo << 'EOF'
[influxdb]
name = InfluxDB Repository - RHEL
baseurl = https://repos.influxdata.com/rhel/\$releasever/\$basearch/stable/
enabled = 1
gpgcheck = 1
gpgkey = https://repos.influxdata.com/influxdb.key
EOF
$PKG_INSTALL influxdb
fi
systemctl enable influxdb
systemctl start influxdb
# Wait for InfluxDB to start
sleep 5
echo -e "${GREEN}[SUCCESS]${NC} InfluxDB installed and started"
}
# Install and configure collectd
install_collectd() {
echo "[3/6] Installing collectd..."
if [[ "$PKG_MGR" == "apt" ]]; then
$PKG_INSTALL collectd collectd-utils
else
$PKG_INSTALL collectd
fi
echo -e "${GREEN}[SUCCESS]${NC} Collectd installed"
}
# Configure InfluxDB
configure_influxdb() {
echo "[4/6] Configuring InfluxDB..."
# Create database and user
influx -execute "CREATE DATABASE $INFLUXDB_DB"
influx -execute "CREATE USER $INFLUXDB_USER WITH PASSWORD '$INFLUXDB_PASS'"
influx -execute "GRANT ALL ON $INFLUXDB_DB TO $INFLUXDB_USER"
# Configure retention policy
influx -execute "CREATE RETENTION POLICY \"30_days\" ON \"$INFLUXDB_DB\" DURATION 30d REPLICATION 1 DEFAULT"
echo -e "${GREEN}[SUCCESS]${NC} InfluxDB configured"
}
# Configure collectd
configure_collectd() {
echo "[5/6] Configuring collectd..."
# Backup original config
cp "$COLLECTD_CONFIG" "$COLLECTD_CONFIG.backup"
# Create new collectd configuration
cat > "$COLLECTD_CONFIG" << EOF
# Global settings
Hostname "$(hostname)"
FQDNLookup false
BaseDir "/var/lib/collectd"
PluginDir "/usr/lib/collectd"
TypesDB "/usr/share/collectd/types.db"
AutoLoadPlugin false
CollectInternalStats true
# Logging
LoadPlugin syslog
<Plugin syslog>
LogLevel info
</Plugin>
# Core plugins
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin processes
LoadPlugin swap
LoadPlugin uptime
# CPU plugin configuration
<Plugin cpu>
ReportByCpu true
ReportByState true
ValuesPercentage true
</Plugin>
# Disk plugin configuration
<Plugin df>
MountPoint "/"
MountPoint "/boot"
MountPoint "/home"
IgnoreSelected false
ReportByDevice false
ReportInodes true
ValuesAbsolute true
ValuesPercentage true
</Plugin>
# Memory plugin configuration
<Plugin memory>
ValuesAbsolute true
ValuesPercentage true
</Plugin>
# Network plugin
LoadPlugin network
# InfluxDB output plugin
LoadPlugin network
<Plugin network>
<Server "127.0.0.1" "25826">
SecurityLevel None
</Server>
</Plugin>
# Write to InfluxDB
LoadPlugin write_http
<Plugin write_http>
<Node "influxdb">
URL "http://127.0.0.1:8086/write?db=$INFLUXDB_DB&u=$INFLUXDB_USER&p=$INFLUXDB_PASS"
Format "JSON"
StoreRates true
</Node>
</Plugin>
# Include additional configurations
Include "/etc/collectd/collectd.conf.d"
EOF
# Create include directory
mkdir -p /etc/collectd/collectd.conf.d
chown -R collectd:collectd /etc/collectd 2>/dev/null || chown -R root:root /etc/collectd
chmod 755 /etc/collectd
chmod 644 "$COLLECTD_CONFIG"
# Enable and start collectd
systemctl enable collectd
systemctl start collectd
echo -e "${GREEN}[SUCCESS]${NC} Collectd configured and started"
}
# Verify installation
verify_installation() {
echo "[6/6] Verifying installation..."
# Check services
if ! systemctl is-active --quiet influxdb; then
echo -e "${RED}[ERROR]${NC} InfluxDB service is not running"
exit 1
fi
if ! systemctl is-active --quiet collectd; then
echo -e "${RED}[ERROR]${NC} Collectd service is not running"
exit 1
fi
# Wait for data collection
echo "Waiting 30 seconds for data collection..."
sleep 30
# Verify data in InfluxDB
if influx -database "$INFLUXDB_DB" -execute "SHOW MEASUREMENTS" | grep -q cpu; then
echo -e "${GREEN}[SUCCESS]${NC} Data collection verified"
else
echo -e "${YELLOW}[WARNING]${NC} No data found yet, this may be normal for new installations"
fi
echo -e "${GREEN}[SUCCESS]${NC} Installation completed successfully!"
echo ""
echo "=== Configuration Details ==="
echo "InfluxDB Database: $INFLUXDB_DB"
echo "InfluxDB User: $INFLUXDB_USER"
echo "InfluxDB Password: $INFLUXDB_PASS"
echo "InfluxDB URL: http://localhost:8086"
echo "Collectd Config: $COLLECTD_CONFIG"
echo ""
echo "You can query data using: influx -database $INFLUXDB_DB"
}
# Main execution
main() {
echo -e "${GREEN}Linux Performance Monitoring Setup${NC}"
echo "Configuring collectd and InfluxDB 1.8..."
echo ""
parse_args "$@"
check_prerequisites
detect_distro
update_system
install_influxdb
install_collectd
configure_influxdb
configure_collectd
verify_installation
}
main "$@"
Review the script before running. Execute with: bash install.sh