Learn to implement production-grade Envoy proxy with global and local rate limiting, circuit breakers, and health checks to build resilient microservices architecture that can handle traffic spikes and service failures gracefully.
Prerequisites
- Root or sudo access
- Docker and Docker Compose installed
- Basic understanding of microservices architecture
- Familiarity with HTTP/REST APIs
What this solves
Microservices architectures face challenges with cascading failures, traffic spikes, and service overload that can bring down entire systems. Envoy proxy provides advanced rate limiting and circuit breaker capabilities to prevent these issues by controlling traffic flow and isolating failing services. This tutorial shows you how to configure production-grade resilience patterns using Envoy's built-in features.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you get the latest security patches and dependencies.
sudo apt update && sudo apt upgrade -yInstall required dependencies
Install Docker and Docker Compose to run Envoy and sample microservices for testing our configuration.
sudo apt install -y apt-transport-https ca-certificates curl gnupg lsb-release
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list
sudo apt update
sudo apt install -y docker-ce docker-ce-cli containerd.io docker-compose-pluginStart Docker service
Enable and start Docker to ensure it runs on boot and is available for our Envoy containers.
sudo systemctl enable --now docker
sudo usermod -aG docker $USER
newgrp dockerCreate project directory structure
Set up organized directories for our Envoy configuration files and microservice definitions.
mkdir -p ~/envoy-resilience/{configs,services,logs}
cd ~/envoy-resilienceConfigure Envoy with rate limiting and circuit breakers
Create the main Envoy configuration that includes global rate limiting, local rate limiting, and circuit breaker settings.
static_resources:
listeners:
- name: main_listener
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 8080
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/envoy/access.log"
http_filters:
- name: envoy.filters.http.local_ratelimit
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit
value:
stat_prefix: local_rate_limiter
token_bucket:
max_tokens: 100
tokens_per_fill: 100
fill_interval: 60s
filter_enabled:
runtime_key: local_rate_limit_enabled
default_value:
numerator: 100
denominator: HUNDRED
filter_enforced:
runtime_key: local_rate_limit_enforced
default_value:
numerator: 100
denominator: HUNDRED
- name: envoy.filters.http.ratelimit
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ratelimit.v3.RateLimit
domain: edge_proxy_ratelimit
stage: 0
request_type: both
timeout: 0.25s
failure_mode_deny: false
rate_limit_service:
grpc_service:
envoy_grpc:
cluster_name: ratelimit_cluster
transport_api_version: V3
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
route_config:
name: local_route
virtual_hosts:
- name: backend_services
domains: ["*"]
routes:
- match:
prefix: "/api/v1/users"
route:
cluster: user_service
timeout: 5s
retry_policy:
retry_on: 5xx,gateway-error,connect-failure,refused-stream
num_retries: 3
per_try_timeout: 2s
retry_back_off:
base_interval: 0.25s
max_interval: 2s
- match:
prefix: "/api/v1/orders"
route:
cluster: order_service
timeout: 10s
retry_policy:
retry_on: 5xx,gateway-error,connect-failure,refused-stream
num_retries: 2
per_try_timeout: 5s
rate_limits:
- actions:
- request_headers:
header_name: "x-forwarded-for"
descriptor_key: "remote_address"
- actions:
- generic_key:
descriptor_value: "global_rate_limit"
clusters:
- name: user_service
connect_timeout: 2s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 50
max_pending_requests: 30
max_requests: 100
max_retries: 10
retry_budget:
budget_percent:
value: 25.0
min_retry_concurrency: 5
- priority: HIGH
max_connections: 10
max_pending_requests: 5
max_requests: 20
max_retries: 3
outlier_detection:
consecutive_5xx: 3
consecutive_gateway_failure: 3
interval: 10s
base_ejection_time: 30s
max_ejection_percent: 50
min_health_percent: 30
health_checks:
- timeout: 1s
interval: 5s
unhealthy_threshold: 3
healthy_threshold: 2
http_health_check:
path: "/health"
expected_statuses:
- start: 200
end: 299
load_assignment:
cluster_name: user_service
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: user_service
port_value: 3000
- name: order_service
connect_timeout: 3s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: LEAST_REQUEST
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 30
max_pending_requests: 20
max_requests: 60
max_retries: 5
retry_budget:
budget_percent:
value: 20.0
min_retry_concurrency: 3
outlier_detection:
consecutive_5xx: 5
consecutive_gateway_failure: 5
interval: 15s
base_ejection_time: 60s
max_ejection_percent: 30
min_health_percent: 50
health_checks:
- timeout: 2s
interval: 10s
unhealthy_threshold: 3
healthy_threshold: 2
http_health_check:
path: "/health"
load_assignment:
cluster_name: order_service
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: order_service
port_value: 3000
- name: ratelimit_cluster
connect_timeout: 1s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
http2_protocol_options: {}
load_assignment:
cluster_name: ratelimit_cluster
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: ratelimit_service
port_value: 8081
admin:
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 9901Configure global rate limiting service
Create the rate limiting configuration that defines specific limits for different request patterns and client types.
domain: edge_proxy_ratelimit
descriptors:
# Global rate limit: 1000 requests per minute
- key: generic_key
value: "global_rate_limit"
rate_limit:
unit: minute
requests_per_unit: 1000
# Per-IP rate limit: 100 requests per minute
- key: remote_address
rate_limit:
unit: minute
requests_per_unit: 100
# Specific endpoint limits
- key: header_match
value: "api_v1_users"
descriptors:
- key: remote_address
rate_limit:
unit: minute
requests_per_unit: 50
# Premium users get higher limits
- key: header_match
value: "premium_user"
descriptors:
- key: remote_address
rate_limit:
unit: minute
requests_per_unit: 500
# Burst protection for critical endpoints
- key: generic_key
value: "burst_protection"
rate_limit:
unit: second
requests_per_unit: 10Create sample microservices
Set up simple Node.js services to test our Envoy configuration with realistic backend behavior including health checks.
const express = require('express');
const app = express();
const port = 3000;
let requestCount = 0;
let isHealthy = true;
app.use(express.json());
// Health check endpoint
app.get('/health', (req, res) => {
if (isHealthy) {
res.status(200).json({ status: 'healthy', service: 'user-service', uptime: process.uptime() });
} else {
res.status(503).json({ status: 'unhealthy', service: 'user-service' });
}
});
// Simulate user endpoints
app.get('/api/v1/users', (req, res) => {
requestCount++;
// Simulate occasional failures to test circuit breakers
if (requestCount % 20 === 0) {
return res.status(500).json({ error: 'Simulated server error' });
}
// Simulate slow responses
const delay = Math.random() * 1000;
setTimeout(() => {
res.json({
users: [
{ id: 1, name: 'Alice', email: 'alice@example.com' },
{ id: 2, name: 'Bob', email: 'bob@example.com' }
],
requestId: requestCount,
processingTime: delay
});
}, delay);
});
app.get('/api/v1/users/:id', (req, res) => {
requestCount++;
const userId = parseInt(req.params.id);
if (userId > 1000) {
return res.status(404).json({ error: 'User not found' });
}
res.json({
id: userId,
name: User${userId},
email: user${userId}@example.com,
requestId: requestCount
});
});
// Admin endpoint to control health
app.post('/admin/health', (req, res) => {
isHealthy = req.body.healthy !== false;
res.json({ status: isHealthy ? 'healthy' : 'unhealthy' });
});
app.listen(port, '0.0.0.0', () => {
console.log(User service listening on port ${port});
});Create order service
Build a second microservice with different performance characteristics to demonstrate cluster-specific circuit breaker settings.
const express = require('express');
const app = express();
const port = 3000;
let requestCount = 0;
let isHealthy = true;
app.use(express.json());
// Health check endpoint
app.get('/health', (req, res) => {
if (isHealthy) {
res.status(200).json({ status: 'healthy', service: 'order-service', uptime: process.uptime() });
} else {
res.status(503).json({ status: 'unhealthy', service: 'order-service' });
}
});
// Order endpoints with heavier processing simulation
app.get('/api/v1/orders', (req, res) => {
requestCount++;
// Simulate database-heavy operations with longer delays
const delay = Math.random() * 3000 + 500; // 500ms to 3.5s
// Simulate more frequent failures for this service
if (requestCount % 15 === 0) {
return res.status(503).json({ error: 'Database connection timeout' });
}
if (requestCount % 25 === 0) {
return res.status(500).json({ error: 'Internal processing error' });
}
setTimeout(() => {
res.json({
orders: [
{ id: 1001, userId: 1, total: 99.99, status: 'shipped' },
{ id: 1002, userId: 2, total: 149.99, status: 'processing' }
],
requestId: requestCount,
processingTime: delay,
timestamp: new Date().toISOString()
});
}, delay);
});
app.post('/api/v1/orders', (req, res) => {
requestCount++;
// Simulate order creation with validation
const { userId, items, total } = req.body;
if (!userId || !items || !total) {
return res.status(400).json({ error: 'Missing required fields' });
}
// Simulate processing delay
setTimeout(() => {
res.status(201).json({
orderId: 1000 + requestCount,
userId,
items,
total,
status: 'created',
requestId: requestCount
});
}, Math.random() * 2000 + 1000); // 1-3 second processing time
});
app.post('/admin/health', (req, res) => {
isHealthy = req.body.healthy !== false;
res.json({ status: isHealthy ? 'healthy' : 'unhealthy' });
});
app.listen(port, '0.0.0.0', () => {
console.log(Order service listening on port ${port});
});Create package.json for services
Define the Node.js dependencies for our microservices with minimal required packages.
{
"name": "envoy-microservices",
"version": "1.0.0",
"description": "Sample microservices for Envoy resilience testing",
"main": "user-service.js",
"scripts": {
"start:user": "node user-service.js",
"start:order": "node order-service.js"
},
"dependencies": {
"express": "^4.18.2"
},
"keywords": ["microservices", "envoy", "nodejs"],
"author": "Infrastructure Team",
"license": "MIT"
}Create Docker Compose configuration
Define the complete multi-service setup including Envoy, rate limiting service, Redis backend, and our microservices.
version: '3.8'
services:
envoy:
image: envoyproxy/envoy:v1.28-latest
ports:
- "8080:8080"
- "9901:9901"
volumes:
- ./configs/envoy.yaml:/etc/envoy/envoy.yaml:ro
- ./logs:/var/log/envoy
command: /usr/local/bin/envoy -c /etc/envoy/envoy.yaml --service-cluster envoy-proxy --service-node envoy-node --log-level info
depends_on:
- user_service
- order_service
- ratelimit_service
networks:
- envoy_network
ratelimit_service:
image: envoyproxy/ratelimit:master
ports:
- "8081:8081"
- "6070:6070"
environment:
- USE_STATSD=false
- LOG_LEVEL=debug
- REDIS_SOCKET_TYPE=tcp
- REDIS_URL=redis:6379
- RUNTIME_ROOT=/data
- RUNTIME_SUBDIRECTORY=ratelimit
- RUNTIME_WATCH_ROOT=false
volumes:
- ./configs/ratelimit-config.yaml:/data/ratelimit/config/config.yaml:ro
depends_on:
- redis
networks:
- envoy_network
redis:
image: redis:7-alpine
ports:
- "6379:6379"
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
volumes:
- redis_data:/data
networks:
- envoy_network
user_service:
build:
context: ./services
dockerfile_inline: |
FROM node:18-alpine
WORKDIR /app
COPY package*.json ./
RUN npm install --production
COPY user-service.js ./
EXPOSE 3000
USER node
CMD ["node", "user-service.js"]
ports:
- "3001:3000"
environment:
- NODE_ENV=production
- SERVICE_NAME=user-service
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 10s
timeout: 3s
retries: 3
networks:
- envoy_network
order_service:
build:
context: ./services
dockerfile_inline: |
FROM node:18-alpine
WORKDIR /app
COPY package*.json ./
RUN npm install --production
COPY order-service.js ./
EXPOSE 3000
USER node
CMD ["node", "order-service.js"]
ports:
- "3002:3000"
environment:
- NODE_ENV=production
- SERVICE_NAME=order-service
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3000/health"]
interval: 15s
timeout: 5s
retries: 3
networks:
- envoy_network
volumes:
redis_data:
networks:
envoy_network:
driver: bridgeStart the complete service stack
Launch all services using Docker Compose and verify they start correctly with proper networking.
cd ~/envoy-resilience
docker compose up -d
docker compose logs -f --tail=50Create load testing script
Build a comprehensive test script to validate rate limiting and circuit breaker functionality under various load conditions.
#!/bin/bash
set -e
ENVOY_URL="http://localhost:8080"
ADMIN_URL="http://localhost:9901"
echo "=== Envoy Resilience Testing Script ==="
echo "Testing rate limiting and circuit breakers..."
Function to test basic connectivity
test_connectivity() {
echo "\n1. Testing basic connectivity..."
echo "User service health:"
curl -s "${ENVOY_URL}/api/v1/users" | jq '.' || echo "Request failed"
echo "\nOrder service health:"
curl -s "${ENVOY_URL}/api/v1/orders" | jq '.' || echo "Request failed"
echo "\nEnvoy admin stats:"
curl -s "${ADMIN_URL}/stats" | grep -E "(ratelimit|circuit_breakers|health_check)" | head -10
}
Function to test local rate limiting
test_local_rate_limiting() {
echo "\n2. Testing local rate limiting (100 requests/minute)..."
echo "Sending 10 rapid requests:"
for i in {1..10}; do
status=$(curl -s -o /dev/null -w "%{http_code}" "${ENVOY_URL}/api/v1/users")
echo "Request $i: HTTP $status"
sleep 0.1
done
echo "\nLocal rate limit stats:"
curl -s "${ADMIN_URL}/stats" | grep local_rate_limit
}
Function to test global rate limiting
test_global_rate_limiting() {
echo "\n3. Testing global rate limiting..."
echo "Sending requests with different IPs (simulated):"
for i in {1..5}; do
status=$(curl -s -o /dev/null -w "%{http_code}" \
-H "X-Forwarded-For: 203.0.113.$i" \
"${ENVOY_URL}/api/v1/users")
echo "Request from IP 203.0.113.$i: HTTP $status"
done
echo "\nGlobal rate limit stats:"
curl -s "${ADMIN_URL}/stats" | grep ratelimit | head -5
}
Function to test circuit breakers
test_circuit_breakers() {
echo "\n4. Testing circuit breakers..."
echo "Making service unhealthy to trigger circuit breaker:"
curl -s -X POST -H "Content-Type: application/json" \
-d '{"healthy":false}' \
"http://localhost:3001/admin/health"
echo "\nWaiting for health check to detect unhealthy state..."
sleep 15
echo "\nSending requests to trigger circuit breaker:"
for i in {1..5}; do
status=$(curl -s -o /dev/null -w "%{http_code}" "${ENVOY_URL}/api/v1/users")
echo "Request $i: HTTP $status"
sleep 1
done
echo "\nCircuit breaker stats:"
curl -s "${ADMIN_URL}/stats" | grep -E "(cx_open|rq_pending_open|rq_retry_open)"
echo "\nRestoring service health:"
curl -s -X POST -H "Content-Type: application/json" \
-d '{"healthy":true}' \
"http://localhost:3001/admin/health"
}
Function to test outlier detection
test_outlier_detection() {
echo "\n5. Testing outlier detection..."
echo "Current cluster stats:"
curl -s "${ADMIN_URL}/stats" | grep -E "(outlier_detection|health_check)" | head -8
echo "\nCluster health status:"
curl -s "${ADMIN_URL}/clusters" | grep -A 10 -B 5 "user_service"
}
Function to show comprehensive stats
show_stats() {
echo "\n6. Comprehensive Envoy statistics:"
echo "\n=== Rate Limiting Stats ==="
curl -s "${ADMIN_URL}/stats" | grep ratelimit
echo "\n=== Circuit Breaker Stats ==="
curl -s "${ADMIN_URL}/stats" | grep circuit_breakers
echo "\n=== Health Check Stats ==="
curl -s "${ADMIN_URL}/stats" | grep health_check
echo "\n=== Upstream Stats ==="
curl -s "${ADMIN_URL}/stats" | grep -E "(user_service|order_service)" | head -10
echo "\n=== Overall Request Stats ==="
curl -s "${ADMIN_URL}/stats" | grep -E "(downstream_rq_|upstream_rq_)" | head -10
}
Main execution
echo "Waiting for services to be ready..."
sleep 10
test_connectivity
test_local_rate_limiting
test_global_rate_limiting
test_circuit_breakers
test_outlier_detection
show_stats
echo "\n=== Testing Complete ==="
echo "Check Envoy admin interface at: ${ADMIN_URL}"
echo "View service logs with: docker compose logs -f"Make test script executable and run tests
Set proper permissions and execute the comprehensive testing script to verify all resilience features.
chmod +x ~/envoy-resilience/test-resilience.sh
./test-resilience.shConfigure monitoring and observability
Create monitoring dashboard configuration
Set up Prometheus metrics collection to monitor Envoy's rate limiting and circuit breaker behavior in production.
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'envoy-proxy'
static_configs:
- targets: ['envoy:9901']
metrics_path: '/stats/prometheus'
scrape_interval: 10s
- job_name: 'ratelimit-service'
static_configs:
- targets: ['ratelimit_service:6070']
scrape_interval: 15s
- job_name: 'user-service'
static_configs:
- targets: ['user_service:3000']
metrics_path: '/metrics'
scrape_interval: 30s
- job_name: 'order-service'
static_configs:
- targets: ['order_service:3000']
metrics_path: '/metrics'
scrape_interval: 30sAdd monitoring to Docker Compose
Extend your Docker Compose configuration to include Prometheus for metrics collection and monitoring.
cat >> ~/envoy-resilience/docker-compose.yml << 'EOF'
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./configs/prometheus.yml:/etc/prometheus/prometheus.yml:ro
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
networks:
- envoy_network
EOFVerify your setup
Confirm that all components are working correctly and resilience features are properly configured.
# Check all services are running
docker compose ps
Test Envoy admin interface
curl -s http://localhost:9901/stats | grep -E "(ratelimit|circuit_breakers|health_check)" | head -10
Test rate limiting
for i in {1..5}; do curl -s -o /dev/null -w "HTTP %{http_code}\n" http://localhost:8080/api/v1/users; done
Check circuit breaker stats
curl -s http://localhost:9901/stats | grep circuit_breakers
Verify health checks
curl -s http://localhost:9901/clusters | grep health_flags
Test service endpoints
curl -s http://localhost:8080/api/v1/users | jq '.'
curl -s http://localhost:8080/api/v1/orders | jq '.'docker compose logs envoy and docker network inspect envoy-resilience_envoy_network to troubleshoot networking.Advanced configuration options
For production deployments, you can enhance your Envoy configuration with additional resilience patterns. Consider implementing custom health check endpoints that verify database connectivity and downstream service health. You can also configure different circuit breaker thresholds for different request types using route-specific configurations.
The rate limiting service can be extended with dynamic configuration updates through the runtime layer, allowing you to adjust limits without restarting Envoy. Additionally, integrating with service mesh monitoring provides comprehensive observability across your microservices architecture.
For enhanced security and traffic management, combine these resilience patterns with service mesh authentication and implement distributed tracing to understand request flow patterns during circuit breaker activations.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Rate limiting not working | Redis connection failure | docker compose logs ratelimit_service and check Redis connectivity |
| Circuit breaker always open | Health check misconfiguration | Verify health check endpoints return 200 status: curl http://localhost:3001/health |
| High latency despite circuit breakers | Timeout values too high | Reduce timeout and per_try_timeout in cluster configuration |
| Services not accessible | Network connectivity issues | Check Docker networks: docker network ls and service logs |
| Rate limits too restrictive | Token bucket configuration | Increase max_tokens and adjust tokens_per_fill values |
| Outlier detection not ejecting | Thresholds too permissive | Lower consecutive_5xx and consecutive_gateway_failure values |
Next steps
- Configure Grafana dashboards for monitoring Envoy metrics
- Set up comprehensive monitoring stack with Prometheus and Grafana
- Implement JWT authentication and OAuth2 integration with Envoy
- Configure advanced gRPC load balancing with Envoy
- Deploy Envoy-based service mesh in Kubernetes production environment
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Global variables
PROJECT_DIR="$HOME/envoy-resilience"
TOTAL_STEPS=8
# Usage function
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -h, --help Show this help message"
echo " -d, --dir Project directory (default: ~/envoy-resilience)"
exit 1
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
;;
-d|--dir)
PROJECT_DIR="$2"
shift 2
;;
*)
echo -e "${RED}Error: Unknown option $1${NC}"
usage
;;
esac
done
# Error handling and cleanup
cleanup() {
if [ $? -ne 0 ]; then
echo -e "${RED}Installation failed. Cleaning up...${NC}"
cd "$HOME"
rm -rf "$PROJECT_DIR"
fi
}
trap cleanup ERR
# Check prerequisites
if [ "$EUID" -eq 0 ]; then
echo -e "${RED}Error: Do not run this script as root${NC}"
exit 1
fi
if ! command -v sudo &> /dev/null; then
echo -e "${RED}Error: sudo is required but not installed${NC}"
exit 1
fi
# Auto-detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="sudo apt update && sudo apt upgrade -y"
PKG_INSTALL="sudo apt install -y"
GPG_DIR="/usr/share/keyrings"
DOCKER_REPO="deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu"
LSB_RELEASE_CMD="lsb_release -cs"
;;
almalinux|rocky|centos|rhel|ol)
PKG_MGR="dnf"
PKG_UPDATE="sudo dnf update -y"
PKG_INSTALL="sudo dnf install -y"
DOCKER_REPO_SETUP="sudo dnf install -y yum-utils && sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo"
;;
fedora)
PKG_MGR="dnf"
PKG_UPDATE="sudo dnf update -y"
PKG_INSTALL="sudo dnf install -y"
DOCKER_REPO_SETUP="sudo dnf install -y yum-utils && sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="sudo yum update -y"
PKG_INSTALL="sudo yum install -y"
DOCKER_REPO_SETUP="sudo yum install -y yum-utils && sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo"
;;
*)
echo -e "${RED}Error: Unsupported distribution: $ID${NC}"
exit 1
;;
esac
else
echo -e "${RED}Error: Cannot detect distribution${NC}"
exit 1
fi
echo -e "${GREEN}[1/$TOTAL_STEPS] Updating system packages...${NC}"
eval "$PKG_UPDATE"
echo -e "${GREEN}[2/$TOTAL_STEPS] Installing dependencies...${NC}"
if [ "$PKG_MGR" = "apt" ]; then
$PKG_INSTALL apt-transport-https ca-certificates curl gnupg lsb-release
sudo mkdir -p "$GPG_DIR"
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o "$GPG_DIR/docker-archive-keyring.gpg"
echo "$DOCKER_REPO $(eval $LSB_RELEASE_CMD) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
else
eval "$DOCKER_REPO_SETUP"
fi
echo -e "${GREEN}[3/$TOTAL_STEPS] Installing Docker...${NC}"
$PKG_INSTALL docker-ce docker-ce-cli containerd.io docker-compose-plugin
echo -e "${GREEN}[4/$TOTAL_STEPS] Configuring Docker service...${NC}"
sudo systemctl enable --now docker
sudo usermod -aG docker "$USER"
echo -e "${GREEN}[5/$TOTAL_STEPS] Creating project structure...${NC}"
mkdir -p "$PROJECT_DIR"/{configs,services,logs}
cd "$PROJECT_DIR"
echo -e "${GREEN}[6/$TOTAL_STEPS] Creating Envoy configuration...${NC}"
cat > configs/envoy.yaml << 'EOF'
static_resources:
listeners:
- name: main_listener
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 8080
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/envoy/access.log"
http_filters:
- name: envoy.filters.http.local_ratelimit
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit
value:
stat_prefix: local_rate_limiter
token_bucket:
max_tokens: 100
tokens_per_fill: 100
fill_interval: 60s
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
route_config:
name: local_route
virtual_hosts:
- name: backend
domains: ["*"]
routes:
- match:
prefix: "/"
route:
cluster: backend_service
clusters:
- name: backend_service
connect_timeout: 0.25s
type: LOGICAL_DNS
lb_policy: ROUND_ROBIN
circuit_breakers:
thresholds:
- priority: DEFAULT
max_connections: 50
max_pending_requests: 10
max_requests: 100
max_retries: 3
outlier_detection:
consecutive_5xx: 3
interval: 30s
base_ejection_time: 30s
max_ejection_percent: 50
load_assignment:
cluster_name: backend_service
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: backend
port_value: 3000
EOF
echo -e "${GREEN}[7/$TOTAL_STEPS] Creating Docker Compose configuration...${NC}"
cat > docker-compose.yml << 'EOF'
version: '3.8'
services:
envoy:
image: envoyproxy/envoy:v1.28-latest
container_name: envoy-proxy
ports:
- "8080:8080"
- "9901:9901"
volumes:
- ./configs/envoy.yaml:/etc/envoy/envoy.yaml:ro
- ./logs:/var/log/envoy
command: ["envoy", "-c", "/etc/envoy/envoy.yaml", "--service-cluster", "front-proxy"]
depends_on:
- backend
backend:
image: nginx:alpine
container_name: backend-service
volumes:
- ./services/nginx.conf:/etc/nginx/nginx.conf:ro
expose:
- "3000"
EOF
mkdir -p services
cat > services/nginx.conf << 'EOF'
events {
worker_connections 1024;
}
http {
server {
listen 3000;
location / {
return 200 "Backend service response\n";
add_header Content-Type text/plain;
}
location /health {
return 200 "OK\n";
add_header Content-Type text/plain;
}
}
}
EOF
# Set proper permissions
chmod 755 "$PROJECT_DIR" "$PROJECT_DIR"/{configs,services,logs}
chmod 644 "$PROJECT_DIR"/configs/envoy.yaml "$PROJECT_DIR"/docker-compose.yml "$PROJECT_DIR"/services/nginx.conf
echo -e "${GREEN}[8/$TOTAL_STEPS] Running verification checks...${NC}"
# Verify Docker installation
if ! docker --version > /dev/null 2>&1; then
echo -e "${RED}Error: Docker installation failed${NC}"
exit 1
fi
# Test Docker Compose
if ! docker compose version > /dev/null 2>&1; then
echo -e "${YELLOW}Warning: docker compose command not available, trying docker-compose${NC}"
if ! command -v docker-compose > /dev/null 2>&1; then
echo -e "${RED}Error: Docker Compose not available${NC}"
exit 1
fi
fi
# Verify configuration files
if [ ! -f "$PROJECT_DIR/configs/envoy.yaml" ] || [ ! -f "$PROJECT_DIR/docker-compose.yml" ]; then
echo -e "${RED}Error: Configuration files not created properly${NC}"
exit 1
fi
echo -e "${GREEN}Installation completed successfully!${NC}"
echo ""
echo -e "${YELLOW}Next steps:${NC}"
echo "1. Log out and back in to apply Docker group membership"
echo "2. cd $PROJECT_DIR"
echo "3. docker compose up -d"
echo "4. Test rate limiting: curl -i http://localhost:8080/"
echo "5. Monitor Envoy admin: http://localhost:9901/"
echo ""
echo -e "${YELLOW}Configuration files created:${NC}"
echo "- Envoy config: $PROJECT_DIR/configs/envoy.yaml"
echo "- Docker Compose: $PROJECT_DIR/docker-compose.yml"
echo "- Backend config: $PROJECT_DIR/services/nginx.conf"
Review the script before running. Execute with: bash install.sh