Deploy production-grade Apache Airflow with Kubernetes Executor for dynamic workflow scaling. Configure PostgreSQL backend, RBAC authentication, and auto-scaling policies with Prometheus monitoring integration.
Prerequisites
- At least 8GB RAM
- 4 CPU cores
- 50GB disk space
- Root or sudo access
- Basic Kubernetes knowledge
What this solves
Apache Airflow with Kubernetes Executor provides dynamic scaling for data workflows by creating pods on-demand for each task execution. This setup eliminates resource waste from idle workers while handling variable workloads efficiently. You get built-in fault tolerance, resource isolation per task, and seamless integration with existing Kubernetes infrastructure for production data pipeline orchestration.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you get the latest versions of all dependencies.
sudo apt update && sudo apt upgrade -y
sudo apt install -y curl wget gnupg software-properties-common
Install Docker and container runtime
Kubernetes requires a container runtime. Install Docker as the container engine for running Airflow worker pods.
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list
sudo apt update
sudo apt install -y docker-ce docker-ce-cli containerd.io
sudo systemctl enable --now docker
sudo usermod -aG docker $USER
Install Kubernetes cluster
Install kubeadm, kubelet, and kubectl for managing the Kubernetes cluster that will host Airflow workers.
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.31/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt update
sudo apt install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
Initialize Kubernetes cluster
Initialize the Kubernetes control plane and configure the cluster for Airflow deployment.
sudo kubeadm init --pod-network-cidr=10.244.0.0/16
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Install Calico CNI network plugin
Install Calico for pod networking and network policy enforcement in the Kubernetes cluster.
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/tigera-operator.yaml
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.28.1/manifests/custom-resources.yaml
kubectl taint nodes --all node-role.kubernetes.io/control-plane-
Install PostgreSQL for Airflow metadata
Deploy PostgreSQL as the Airflow metadata database backend with persistent storage.
apiVersion: v1
kind: Secret
metadata:
name: postgres-secret
namespace: airflow
type: Opaque
data:
postgres-password: YWlyZmxvd3Bhc3N3b3Jk # airflowpassword base64 encoded
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-pvc
namespace: airflow
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: postgres
namespace: airflow
spec:
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:17
env:
- name: POSTGRES_DB
value: "airflow"
- name: POSTGRES_USER
value: "airflow"
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-secret
key: postgres-password
ports:
- containerPort: 5432
volumeMounts:
- name: postgres-storage
mountPath: /var/lib/postgresql/data
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
volumes:
- name: postgres-storage
persistentVolumeClaim:
claimName: postgres-pvc
---
apiVersion: v1
kind: Service
metadata:
name: postgres-service
namespace: airflow
spec:
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432
type: ClusterIP
Create Airflow namespace and RBAC
Create the airflow namespace and configure role-based access control for Airflow components.
kubectl create namespace airflow
apiVersion: v1
kind: ServiceAccount
metadata:
name: airflow
namespace: airflow
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: airflow-cluster-role
rules:
- apiGroups: [""]
resources: ["pods", "pods/log", "pods/exec"]
verbs: ["create", "get", "list", "watch", "delete", "patch"]
- apiGroups: [""]
resources: ["secrets", "configmaps"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: airflow-cluster-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: airflow-cluster-role
subjects:
- kind: ServiceAccount
name: airflow
namespace: airflow
Deploy PostgreSQL and RBAC configuration
Apply the PostgreSQL deployment and RBAC configuration to the Kubernetes cluster.
kubectl apply -f airflow-postgres.yaml
kubectl apply -f airflow-rbac.yaml
kubectl wait --for=condition=ready pod -l app=postgres -n airflow --timeout=300s
Create Airflow configuration
Configure Airflow to use Kubernetes Executor with the PostgreSQL backend and proper resource limits.
apiVersion: v1
kind: ConfigMap
metadata:
name: airflow-config
namespace: airflow
data:
airflow.cfg: |
[core]
executor = KubernetesExecutor
sql_alchemy_conn = postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow
load_examples = False
dags_are_paused_at_creation = False
parallelism = 64
dag_concurrency = 32
max_active_runs_per_dag = 16
[webserver]
base_url = http://localhost:8080
web_server_port = 8080
workers = 4
worker_refresh_batch_size = 1
worker_refresh_interval = 6000
[scheduler]
dag_dir_list_interval = 60
catchup_by_default = False
max_tis_per_query = 512
[kubernetes]
namespace = airflow
airflow_configmap = airflow-config
worker_container_repository = apache/airflow
worker_container_tag = 2.10.2-python3.11
worker_service_account_name = airflow
delete_worker_pods = True
delete_worker_pods_on_failure = False
worker_pods_creation_batch_size = 1
worker_container_image_pull_policy = IfNotPresent
[kubernetes_pod_template]
pod_template_file = /opt/airflow/pod_template.yaml
Create Airflow pod template
Define the pod template for worker pods with resource limits and security context.
apiVersion: v1
kind: ConfigMap
metadata:
name: airflow-pod-template
namespace: airflow
data:
pod_template.yaml: |
apiVersion: v1
kind: Pod
metadata:
name: airflow-worker-template
namespace: airflow
spec:
serviceAccountName: airflow
restartPolicy: Never
containers:
- name: base
image: apache/airflow:2.10.2-python3.11
env:
- name: AIRFLOW__CORE__EXECUTOR
value: "LocalExecutor"
- name: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
value: "postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow"
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
securityContext:
runAsUser: 50000
runAsGroup: 50000
Deploy Airflow webserver
Deploy the Airflow webserver with the Kubernetes executor configuration.
apiVersion: apps/v1
kind: Deployment
metadata:
name: airflow-webserver
namespace: airflow
spec:
replicas: 1
selector:
matchLabels:
app: airflow-webserver
template:
metadata:
labels:
app: airflow-webserver
spec:
serviceAccountName: airflow
initContainers:
- name: airflow-init
image: apache/airflow:2.10.2-python3.11
command: ["airflow", "db", "init"]
env:
- name: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
value: "postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow"
volumeMounts:
- name: airflow-config
mountPath: /opt/airflow/airflow.cfg
subPath: airflow.cfg
containers:
- name: airflow-webserver
image: apache/airflow:2.10.2-python3.11
command: ["airflow", "webserver"]
ports:
- containerPort: 8080
env:
- name: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
value: "postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow"
volumeMounts:
- name: airflow-config
mountPath: /opt/airflow/airflow.cfg
subPath: airflow.cfg
- name: pod-template
mountPath: /opt/airflow/pod_template.yaml
subPath: pod_template.yaml
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2000m"
volumes:
- name: airflow-config
configMap:
name: airflow-config
- name: pod-template
configMap:
name: airflow-pod-template
---
apiVersion: v1
kind: Service
metadata:
name: airflow-webserver-service
namespace: airflow
spec:
selector:
app: airflow-webserver
ports:
- port: 8080
targetPort: 8080
type: LoadBalancer
Deploy Airflow scheduler
Deploy the Airflow scheduler that will create Kubernetes pods for task execution.
apiVersion: apps/v1
kind: Deployment
metadata:
name: airflow-scheduler
namespace: airflow
spec:
replicas: 1
selector:
matchLabels:
app: airflow-scheduler
template:
metadata:
labels:
app: airflow-scheduler
spec:
serviceAccountName: airflow
containers:
- name: airflow-scheduler
image: apache/airflow:2.10.2-python3.11
command: ["airflow", "scheduler"]
env:
- name: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN
value: "postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow"
volumeMounts:
- name: airflow-config
mountPath: /opt/airflow/airflow.cfg
subPath: airflow.cfg
- name: pod-template
mountPath: /opt/airflow/pod_template.yaml
subPath: pod_template.yaml
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2000m"
volumes:
- name: airflow-config
configMap:
name: airflow-config
- name: pod-template
configMap:
name: airflow-pod-template
Deploy all Airflow components
Apply all Airflow configurations and wait for the components to be ready.
kubectl apply -f airflow-config.yaml
kubectl apply -f pod-template.yaml
kubectl apply -f airflow-webserver.yaml
kubectl apply -f airflow-scheduler.yaml
kubectl wait --for=condition=ready pod -l app=airflow-webserver -n airflow --timeout=600s
kubectl wait --for=condition=ready pod -l app=airflow-scheduler -n airflow --timeout=600s
Configure horizontal pod autoscaler
Set up horizontal pod autoscaling for the Airflow webserver based on CPU and memory usage.
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: airflow-webserver-hpa
namespace: airflow
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: airflow-webserver
minReplicas: 1
maxReplicas: 5
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
kubectl apply -f airflow-hpa.yaml
Install Prometheus for monitoring
Deploy Prometheus to monitor Airflow metrics and Kubernetes cluster performance.
kubectl create namespace monitoring
kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/main/bundle.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: airflow-metrics
namespace: monitoring
spec:
selector:
matchLabels:
app: airflow-webserver
endpoints:
- port: web
path: /admin/metrics
interval: 30s
namespaceSelector:
matchNames:
- airflow
Create admin user
Create an admin user for accessing the Airflow web interface.
kubectl exec -it deployment/airflow-webserver -n airflow -- airflow users create \
--username admin \
--firstname Admin \
--lastname User \
--role Admin \
--email admin@example.com \
--password admin123
Verify your setup
Check that all Airflow components are running and the web interface is accessible.
kubectl get pods -n airflow
kubectl get services -n airflow
kubectl logs deployment/airflow-scheduler -n airflow --tail=50
kubectl port-forward service/airflow-webserver-service 8080:8080 -n airflow
Open your browser to http://localhost:8080 and log in with username admin and password admin123. You can find more details on configuring PostgreSQL streaming replication in our PostgreSQL high availability tutorial.
Configure monitoring and alerting
Enable Airflow metrics endpoint
Configure Airflow to expose metrics for Prometheus scraping.
kubectl patch configmap airflow-config -n airflow --type merge -p '{
"data": {
"airflow.cfg": "[core]\nexecutor = KubernetesExecutor\nsql_alchemy_conn = postgresql+psycopg2://airflow:airflowpassword@postgres-service.airflow.svc.cluster.local:5432/airflow\nload_examples = False\ndags_are_paused_at_creation = False\nparallelism = 64\ndag_concurrency = 32\nmax_active_runs_per_dag = 16\n\n[webserver]\nbase_url = http://localhost:8080\nweb_server_port = 8080\nworkers = 4\nworker_refresh_batch_size = 1\nworker_refresh_interval = 6000\nexpose_config = True\n\n[scheduler]\ndag_dir_list_interval = 60\ncatchup_by_default = False\nmax_tis_per_query = 512\n\n[metrics]\nstatsd_on = True\nstatsd_host = localhost\nstatsd_port = 8125\nstatsd_prefix = airflow\n\n[kubernetes]\nnamespace = airflow\nairflow_configmap = airflow-config\nworker_container_repository = apache/airflow\nworker_container_tag = 2.10.2-python3.11\nworker_service_account_name = airflow\ndelete_worker_pods = True\ndelete_worker_pods_on_failure = False\nworker_pods_creation_batch_size = 1\nworker_container_image_pull_policy = IfNotPresent\n\n[kubernetes_pod_template]\npod_template_file = /opt/airflow/pod_template.yaml"
}
}'
For comprehensive monitoring setup, refer to our Airflow Prometheus monitoring tutorial and Prometheus and Grafana monitoring stack guide.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Pods stuck in Pending state | Insufficient cluster resources | kubectl describe node to check resources, scale nodes |
| Worker pods fail to start | RBAC permissions missing | Verify service account and cluster role binding exist |
| Database connection errors | PostgreSQL not ready | kubectl logs deployment/postgres -n airflow to check database |
| Scheduler not creating worker pods | Pod template configuration error | Check pod template syntax with kubectl apply --dry-run=client |
| High memory usage on workers | Resource limits too high | Adjust memory requests/limits in pod template |
| Tasks failing with permission errors | Security context restrictions | Review and adjust runAsUser in pod template |
Next steps
- Implement Kubernetes network policies with Calico and OPA Gatekeeper for security enforcement
- Configure Airflow DAG deployment with Git sync for CI/CD integration
- Setup Airflow data lineage tracking with OpenLineage for data governance
- Implement Airflow disaster recovery with automated backup strategies
- Configure Airflow multi-cluster deployment with Kubernetes federation
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Global variables
SCRIPT_NAME="$(basename "$0")"
TEMP_FILES=()
KUBERNETES_VERSION="v1.31"
# Cleanup function
cleanup() {
local exit_code=$?
if [ $exit_code -ne 0 ]; then
echo -e "${RED}[ERROR] Installation failed. Cleaning up...${NC}"
for file in "${TEMP_FILES[@]}"; do
[ -f "$file" ] && rm -f "$file"
done
# Stop services if they were started
systemctl is-active --quiet docker && systemctl stop docker || true
systemctl is-active --quiet kubelet && systemctl stop kubelet || true
fi
exit $exit_code
}
trap cleanup ERR EXIT
# Usage message
usage() {
echo "Usage: $SCRIPT_NAME [OPTIONS]"
echo "Install Apache Airflow with Kubernetes Executor"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --pod-subnet Pod subnet CIDR (default: 10.244.0.0/16)"
echo ""
echo "Example: $SCRIPT_NAME --pod-subnet 192.168.0.0/16"
}
# Default values
POD_SUBNET="10.244.0.0/16"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
--pod-subnet)
POD_SUBNET="$2"
shift 2
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
usage
exit 1
;;
esac
done
# Check if running as root or with sudo
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}[ERROR] This script must be run as root or with sudo${NC}"
exit 1
fi
# Detect distribution
echo -e "${BLUE}[1/7] Detecting operating system...${NC}"
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update && apt upgrade -y"
PKG_INSTALL="apt install -y"
DOCKER_REPO_SETUP="setup_docker_apt"
K8S_REPO_SETUP="setup_k8s_apt"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
DOCKER_REPO_SETUP="setup_docker_dnf"
K8S_REPO_SETUP="setup_k8s_dnf"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
DOCKER_REPO_SETUP="setup_docker_dnf"
K8S_REPO_SETUP="setup_k8s_yum"
;;
*)
echo -e "${RED}[ERROR] Unsupported distribution: $ID${NC}"
exit 1
;;
esac
echo -e "${GREEN}Detected: $PRETTY_NAME${NC}"
else
echo -e "${RED}[ERROR] Cannot detect distribution${NC}"
exit 1
fi
# Setup Docker repository for APT-based systems
setup_docker_apt() {
local keyring="/usr/share/keyrings/docker-archive-keyring.gpg"
curl -fsSL https://download.docker.com/linux/$ID/gpg | gpg --dearmor -o "$keyring"
chmod 644 "$keyring"
echo "deb [arch=amd64 signed-by=$keyring] https://download.docker.com/linux/$ID $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list
chmod 644 /etc/apt/sources.list.d/docker.list
apt update
}
# Setup Docker repository for DNF/YUM-based systems
setup_docker_dnf() {
local repo_url="https://download.docker.com/linux/centos/docker-ce.repo"
if [[ "$ID" == "fedora" ]]; then
repo_url="https://download.docker.com/linux/fedora/docker-ce.repo"
fi
$PKG_MGR config-manager --add-repo "$repo_url" || {
curl -fsSL "$repo_url" -o /etc/yum.repos.d/docker-ce.repo
chmod 644 /etc/yum.repos.d/docker-ce.repo
}
}
# Setup Kubernetes repository for APT-based systems
setup_k8s_apt() {
local keyring="/etc/apt/keyrings/kubernetes-apt-keyring.gpg"
mkdir -p /etc/apt/keyrings
curl -fsSL "https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/Release.key" | gpg --dearmor -o "$keyring"
chmod 644 "$keyring"
echo "deb [signed-by=$keyring] https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/deb/ /" > /etc/apt/sources.list.d/kubernetes.list
chmod 644 /etc/apt/sources.list.d/kubernetes.list
apt update
}
# Setup Kubernetes repository for DNF/YUM-based systems
setup_k8s_yum() {
cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/rpm/
enabled=1
gpgcheck=1
gpgkey=https://pkgs.k8s.io/core:/stable:/$KUBERNETES_VERSION/rpm/repodata/repomd.xml.key
EOF
chmod 644 /etc/yum.repos.d/kubernetes.repo
}
setup_k8s_dnf() {
setup_k8s_yum
}
# Update system packages
echo -e "${BLUE}[2/7] Updating system packages...${NC}"
eval "$PKG_UPDATE"
# Install base dependencies
echo -e "${BLUE}[3/7] Installing base dependencies...${NC}"
if [[ "$PKG_MGR" == "apt" ]]; then
$PKG_INSTALL curl wget gnupg software-properties-common apt-transport-https ca-certificates lsb-release
else
$PKG_INSTALL curl wget gnupg2 yum-utils ca-certificates
fi
# Install Docker
echo -e "${BLUE}[4/7] Installing Docker...${NC}"
$DOCKER_REPO_SETUP
$PKG_INSTALL docker-ce docker-ce-cli containerd.io
# Configure Docker daemon
mkdir -p /etc/docker
cat > /etc/docker/daemon.json << EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOF
chmod 644 /etc/docker/daemon.json
# Start and enable Docker
systemctl daemon-reload
systemctl enable docker
systemctl start docker
# Add current user to docker group if not root
if [[ -n "${SUDO_USER:-}" ]]; then
usermod -aG docker "$SUDO_USER"
echo -e "${YELLOW}[INFO] User $SUDO_USER added to docker group. Please log out and back in for changes to take effect.${NC}"
fi
# Install Kubernetes components
echo -e "${BLUE}[5/7] Installing Kubernetes components...${NC}"
$K8S_REPO_SETUP
$PKG_INSTALL kubelet kubeadm kubectl
# Hold Kubernetes packages on APT systems
if [[ "$PKG_MGR" == "apt" ]]; then
apt-mark hold kubelet kubeadm kubectl
fi
# Configure kubelet
systemctl enable kubelet
# Disable swap (required for Kubernetes)
swapoff -a
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
# Configure kernel modules and sysctl
cat > /etc/modules-load.d/k8s.conf << EOF
br_netfilter
overlay
EOF
chmod 644 /etc/modules-load.d/k8s.conf
modprobe br_netfilter
modprobe overlay
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
chmod 644 /etc/sysctl.d/k8s.conf
sysctl --system
# Initialize Kubernetes cluster
echo -e "${BLUE}[6/7] Initializing Kubernetes cluster...${NC}"
kubeadm init --pod-network-cidr="$POD_SUBNET" --cri-socket unix:///var/run/containerd/containerd.sock
# Setup kubectl for root
mkdir -p /root/.kube
cp -f /etc/kubernetes/admin.conf /root/.kube/config
chmod 600 /root/.kube/config
# Setup kubectl for sudo user if exists
if [[ -n "${SUDO_USER:-}" ]]; then
SUDO_HOME=$(eval echo "~$SUDO_USER")
mkdir -p "$SUDO_HOME/.kube"
cp -f /etc/kubernetes/admin.conf "$SUDO_HOME/.kube/config"
chown -R "$SUDO_USER:$SUDO_USER" "$SUDO_HOME/.kube"
chmod 600 "$SUDO_HOME/.kube/config"
fi
# Allow scheduling pods on control plane (for single-node setup)
kubectl taint nodes --all node-role.kubernetes.io/control-plane- || true
# Install CNI plugin (Flannel)
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
# Verification
echo -e "${BLUE}[7/7] Running verification checks...${NC}"
# Check Docker
if systemctl is-active --quiet docker; then
echo -e "${GREEN}✓ Docker is running${NC}"
else
echo -e "${RED}✗ Docker is not running${NC}"
exit 1
fi
# Check kubelet
if systemctl is-active --quiet kubelet; then
echo -e "${GREEN}✓ Kubelet is running${NC}"
else
echo -e "${RED}✗ Kubelet is not running${NC}"
exit 1
fi
# Wait for nodes to be ready
echo -e "${YELLOW}[INFO] Waiting for cluster to be ready...${NC}"
timeout=300
while ! kubectl get nodes | grep -q "Ready"; do
sleep 5
timeout=$((timeout - 5))
if [[ $timeout -le 0 ]]; then
echo -e "${RED}[ERROR] Cluster failed to become ready within 5 minutes${NC}"
exit 1
fi
done
# Display cluster info
echo -e "${GREEN}✓ Kubernetes cluster is ready${NC}"
kubectl get nodes
kubectl get pods --all-namespaces
echo ""
echo -e "${GREEN}[SUCCESS] Apache Airflow Kubernetes Executor setup completed!${NC}"
echo ""
echo -e "${BLUE}Next steps:${NC}"
echo "1. Deploy Airflow using Helm charts with Kubernetes Executor"
echo "2. Configure your DAGs and connections"
echo "3. Set up persistent storage for Airflow metadata"
echo ""
echo -e "${YELLOW}Cluster Information:${NC}"
echo "Pod network CIDR: $POD_SUBNET"
echo "Kubectl config: /root/.kube/config"
if [[ -n "${SUDO_USER:-}" ]]; then
echo "User kubectl config: ~$SUDO_USER/.kube/config"
fi
Review the script before running. Execute with: bash install.sh