Configure DuckDB with advanced partitioning strategies and memory optimization for processing multi-gigabyte datasets efficiently. Includes Python integration, query optimization techniques, and comprehensive monitoring setup.
Prerequisites
- Root access or sudo privileges
- At least 8GB RAM available
- Python 3.8 or higher
- 50GB free disk space for testing
What this solves
DuckDB excels at analytical workloads but requires careful configuration for datasets larger than system memory. This tutorial shows you how to implement table partitioning, configure memory management, and optimize query performance for datasets ranging from gigabytes to terabytes. You'll also set up monitoring to track performance metrics and identify bottlenecks.
Step-by-step configuration
Install DuckDB with Python integration
Start by installing DuckDB and the Python client for programmatic access and advanced configuration options.
sudo apt update
sudo apt install -y python3-pip python3-dev build-essential
pip3 install duckdb==0.9.2 pandas pyarrow
Configure system memory limits
Set memory limits for DuckDB processes to prevent system overload during large dataset operations.
# DuckDB memory limits
duckdb soft as 8388608
duckdb hard as 16777216
duckdb soft memlock 4194304
duckdb hard memlock 8388608
Create DuckDB configuration file
Configure memory allocation, temporary storage location, and threading parameters for optimal performance.
mkdir -p ~/.config/duckdb
sudo mkdir -p /var/lib/duckdb/temp
sudo chown $USER:$USER /var/lib/duckdb/temp
# Memory configuration
SET memory_limit = '4GB';
SET temp_directory = '/var/lib/duckdb/temp';
SET threads = 4;
Performance settings
SET enable_progress_bar = true;
SET checkpoint_threshold = '16MB';
SET wal_autocheckpoint = 1000;
Optimization settings
SET enable_optimizer = true;
SET enable_profiling = true;
Set up partitioned table structure
Create a Python script to implement range and hash partitioning strategies for large datasets.
#!/usr/bin/env python3
import duckdb
import pandas as pd
from datetime import datetime, timedelta
def create_partitioned_tables():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
# Create partitioned sales table with date-based partitioning
conn.execute("""
CREATE TABLE IF NOT EXISTS sales_data (
id INTEGER,
transaction_date DATE,
customer_id INTEGER,
product_id INTEGER,
amount DECIMAL(10,2),
region VARCHAR(50)
) PARTITION BY RANGE (transaction_date);
""")
# Create monthly partitions for the last 24 months
base_date = datetime.now() - timedelta(days=730)
for i in range(24):
partition_date = base_date + timedelta(days=30*i)
next_date = partition_date + timedelta(days=30)
partition_name = f"sales_{partition_date.strftime('%Y_%m')}"
conn.execute(f"""
CREATE TABLE IF NOT EXISTS {partition_name}
PARTITION OF sales_data
FOR VALUES FROM ('{partition_date.strftime('%Y-%m-%d')}')
TO ('{next_date.strftime('%Y-%m-%d')}');
""")
# Create hash-partitioned customer table
conn.execute("""
CREATE TABLE IF NOT EXISTS customer_data (
customer_id INTEGER,
name VARCHAR(100),
email VARCHAR(100),
created_at TIMESTAMP,
lifetime_value DECIMAL(12,2)
) PARTITION BY HASH (customer_id);
""")
# Create 8 hash partitions for customer data
for i in range(8):
conn.execute(f"""
CREATE TABLE IF NOT EXISTS customer_partition_{i}
PARTITION OF customer_data
FOR VALUES WITH (MODULUS 8, REMAINDER {i});
""")
conn.close()
print("Partitioned tables created successfully")
if __name__ == "__main__":
create_partitioned_tables()
sudo mkdir -p /opt/duckdb
sudo cp partition_setup.py /opt/duckdb/
sudo chmod +x /opt/duckdb/partition_setup.py
python3 /opt/duckdb/partition_setup.py
Configure memory management and query optimization
Create advanced configuration for memory allocation, parallel processing, and query optimization.
#!/usr/bin/env python3
import duckdb
import psutil
def configure_performance_settings():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
# Calculate optimal memory settings based on system resources
total_memory = psutil.virtual_memory().total
available_memory = psutil.virtual_memory().available
cpu_count = psutil.cpu_count(logical=False)
# Allocate 60% of available memory to DuckDB
memory_limit = int(available_memory * 0.6)
memory_limit_gb = memory_limit // (1024**3)
# Configure memory and threading
conn.execute(f"SET memory_limit = '{memory_limit_gb}GB';")
conn.execute(f"SET threads = {min(cpu_count, 8)};")
# Enable aggressive optimization
conn.execute("SET enable_optimizer = true;")
conn.execute("SET optimizer_join_order = true;")
conn.execute("SET enable_http_metadata_cache = true;")
# Configure buffer pool
buffer_size = min(memory_limit_gb // 4, 2) # Max 2GB buffer
conn.execute(f"SET buffer_size = '{buffer_size}GB';")
# Set up temporary storage configuration
conn.execute("SET temp_directory = '/var/lib/duckdb/temp';")
conn.execute("SET max_temp_directory_size = '10GB';")
# Configure parallel processing
conn.execute("SET enable_object_cache = true;")
conn.execute("SET preserve_insertion_order = false;")
print(f"Configured DuckDB with {memory_limit_gb}GB memory, {min(cpu_count, 8)} threads")
print(f"Buffer size: {buffer_size}GB, CPU cores: {cpu_count}")
conn.close()
if __name__ == "__main__":
configure_performance_settings()
pip3 install psutil
python3 /opt/duckdb/optimization_config.py
Implement partition pruning and query optimization
Create scripts to demonstrate efficient querying techniques and partition elimination strategies.
#!/usr/bin/env python3
import duckdb
import time
from datetime import datetime, timedelta
def create_optimized_indexes():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
# Create indexes for efficient partition pruning
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_sales_date_region
ON sales_data (transaction_date, region);
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_customer_created
ON customer_data (created_at);
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_customer_value
ON customer_data (lifetime_value DESC);
""")
conn.close()
print("Indexes created for partition pruning")
def demonstrate_partition_pruning():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
# Enable query profiling
conn.execute("PRAGMA enable_profiling = 'query_tree';")
# Example 1: Date range query that should prune partitions
start_time = time.time()
result = conn.execute("""
SELECT region, COUNT(*), SUM(amount)
FROM sales_data
WHERE transaction_date >= '2024-01-01'
AND transaction_date < '2024-03-01'
GROUP BY region
ORDER BY SUM(amount) DESC;
""").fetchall()
query_time = time.time() - start_time
print(f"Date range query completed in {query_time:.2f} seconds")
print(f"Results: {len(result)} regions found")
# Example 2: Hash partition query
start_time = time.time()
result = conn.execute("""
SELECT COUNT(*), AVG(lifetime_value)
FROM customer_data
WHERE customer_id % 8 = 3
AND lifetime_value > 1000;
""").fetchall()
query_time = time.time() - start_time
print(f"Hash partition query completed in {query_time:.2f} seconds")
# Show query plan for optimization analysis
plan = conn.execute("EXPLAIN ANALYZE SELECT * FROM sales_data WHERE transaction_date >= '2024-01-01' LIMIT 10;").fetchall()
print("\nQuery execution plan:")
for row in plan:
print(row[0])
conn.close()
if __name__ == "__main__":
create_optimized_indexes()
demonstrate_partition_pruning()
python3 /opt/duckdb/query_optimizer.py
Set up performance monitoring
Install monitoring tools to track DuckDB performance metrics and resource usage.
sudo apt install -y prometheus-node-exporter
pip3 install prometheus-client flask
Create DuckDB metrics exporter
Build a custom metrics exporter to track query performance, memory usage, and partition statistics.
#!/usr/bin/env python3
import duckdb
import time
import psutil
from prometheus_client import start_http_server, Gauge, Counter, Histogram
from threading import Thread
import logging
Prometheus metrics
query_duration = Histogram('duckdb_query_duration_seconds', 'Query execution time')
active_connections = Gauge('duckdb_active_connections', 'Number of active connections')
memory_usage = Gauge('duckdb_memory_usage_bytes', 'Memory usage in bytes')
partitions_scanned = Counter('duckdb_partitions_scanned_total', 'Total partitions scanned')
rows_processed = Counter('duckdb_rows_processed_total', 'Total rows processed')
class DuckDBMonitor:
def __init__(self, db_path):
self.db_path = db_path
self.running = True
def collect_metrics(self):
while self.running:
try:
conn = duckdb.connect(self.db_path)
# Collect system metrics
memory_info = psutil.virtual_memory()
memory_usage.set(memory_info.used)
# Collect database statistics
tables_info = conn.execute("""
SELECT table_name, estimated_size
FROM duckdb_tables()
WHERE schema_name = 'main';
""").fetchall()
# Check active queries (if available)
try:
active_queries = conn.execute("""
SELECT COUNT(*) FROM duckdb_queries()
WHERE state = 'RUNNING';
""").fetchone()[0]
active_connections.set(active_queries)
except:
# Fallback if queries table not available
active_connections.set(1)
conn.close()
except Exception as e:
logging.error(f"Error collecting metrics: {e}")
time.sleep(30) # Collect metrics every 30 seconds
def start(self):
# Start metrics collection thread
metrics_thread = Thread(target=self.collect_metrics)
metrics_thread.daemon = True
metrics_thread.start()
# Start Prometheus metrics server
start_http_server(8000)
print("DuckDB metrics exporter started on port 8000")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
self.running = False
print("Shutting down metrics exporter")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
monitor = DuckDBMonitor('/var/lib/duckdb/analytics.db')
monitor.start()
python3 /opt/duckdb/metrics_exporter.py &
Configure systemd service for DuckDB monitoring
Create a systemd service to ensure the metrics exporter runs automatically on system startup.
[Unit]
Description=DuckDB Performance Monitor
After=network.target
[Service]
Type=simple
User=duckdb
Group=duckdb
WorkingDirectory=/opt/duckdb
ExecStart=/usr/bin/python3 /opt/duckdb/metrics_exporter.py
Restart=always
RestartSec=10
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
Environment=PYTHONPATH=/opt/duckdb
[Install]
WantedBy=multi-user.target
sudo useradd -r -s /bin/false duckdb
sudo chown -R duckdb:duckdb /opt/duckdb /var/lib/duckdb
sudo systemctl daemon-reload
sudo systemctl enable --now duckdb-monitor
Set up automated performance benchmarking
Create a benchmarking script to regularly test query performance and partition efficiency.
#!/usr/bin/env python3
import duckdb
import time
import json
from datetime import datetime
import pandas as pd
def run_benchmark_suite():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
results = []
# Benchmark queries with different partition access patterns
benchmark_queries = [
{
"name": "single_partition_scan",
"query": """
SELECT COUNT(*), AVG(amount)
FROM sales_data
WHERE transaction_date >= '2024-01-01'
AND transaction_date < '2024-02-01'
"""
},
{
"name": "multi_partition_scan",
"query": """
SELECT region, COUNT(*), SUM(amount)
FROM sales_data
WHERE transaction_date >= '2024-01-01'
AND transaction_date < '2024-06-01'
GROUP BY region
"""
},
{
"name": "hash_partition_join",
"query": """
SELECT c.name, COUNT(s.id), SUM(s.amount)
FROM customer_data c
JOIN sales_data s ON c.customer_id = s.customer_id
WHERE c.lifetime_value > 5000
AND s.transaction_date >= '2024-01-01'
GROUP BY c.name
ORDER BY SUM(s.amount) DESC
LIMIT 100
"""
}
]
for benchmark in benchmark_queries:
# Enable profiling
conn.execute("PRAGMA enable_profiling = 'query_tree';")
# Run query multiple times and average
times = []
for _ in range(3):
start_time = time.time()
result = conn.execute(benchmark["query"]).fetchall()
end_time = time.time()
times.append(end_time - start_time)
avg_time = sum(times) / len(times)
results.append({
"query_name": benchmark["name"],
"avg_execution_time": avg_time,
"result_count": len(result),
"timestamp": datetime.now().isoformat()
})
print(f"{benchmark['name']}: {avg_time:.2f}s avg ({len(result)} rows)")
# Save results to file
with open('/var/log/duckdb-benchmark.json', 'a') as f:
for result in results:
f.write(json.dumps(result) + '\n')
conn.close()
return results
if __name__ == "__main__":
print("Starting DuckDB performance benchmark...")
results = run_benchmark_suite()
print(f"Benchmark completed. {len(results)} queries executed.")
sudo mkdir -p /var/log
sudo touch /var/log/duckdb-benchmark.json
sudo chown duckdb:duckdb /var/log/duckdb-benchmark.json
python3 /opt/duckdb/benchmark.py
Configure automated benchmark scheduling
Set up a cron job to run performance benchmarks regularly and track performance trends over time.
sudo crontab -u duckdb -e
# Run performance benchmark every 4 hours
0 /4 /usr/bin/python3 /opt/duckdb/benchmark.py >> /var/log/duckdb-benchmark.log 2>&1
Clean up old temp files daily
0 2 * find /var/lib/duckdb/temp -type f -mtime +7 -delete
Rotate benchmark logs weekly
0 0 0 logrotate -f /etc/logrotate.d/duckdb
/var/log/duckdb-benchmark.log {
weekly
rotate 12
compress
delaycompress
missingok
notifempty
copytruncate
su duckdb duckdb
}
Verify your setup
Test the partitioned database configuration and confirm optimal performance settings are active.
# Check DuckDB version and configuration
python3 -c "import duckdb; print(f'DuckDB version: {duckdb.__version__}')"
Verify partitioned tables exist
python3 -c "
import duckdb
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
result = conn.execute(\"SELECT table_name FROM duckdb_tables() WHERE schema_name = 'main';\").fetchall()
print('Tables:', [r[0] for r in result])
conn.close()"
Check monitoring service status
sudo systemctl status duckdb-monitor
Test metrics endpoint
curl -s http://localhost:8000/metrics | grep duckdb_
Run a quick benchmark
python3 /opt/duckdb/benchmark.py
Check memory configuration
free -h
cat /proc/meminfo | grep -E 'MemTotal|MemAvailable'
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Out of memory errors | Memory limit too high for system | Reduce memory_limit in config, check available RAM with free -h |
| Slow partition queries | Missing indexes or partition pruning not working | Check query plan with EXPLAIN ANALYZE, ensure WHERE clauses match partition keys |
| Temp directory full | Large query spilling to disk | Increase temp_directory size or clean old files with find /var/lib/duckdb/temp -mtime +1 -delete |
| Metrics exporter not starting | Permission issues or missing dependencies | Check service logs with journalctl -u duckdb-monitor, verify ownership of /opt/duckdb |
| Poor query performance | Suboptimal threading or buffer configuration | Run python3 /opt/duckdb/optimization_config.py to recalculate settings |
Next steps
- Configure DuckDB cluster setup for distributed analytics for multi-node deployments
- Setup DuckDB with Apache Airflow for automated data pipeline orchestration
- Set up Prometheus and Grafana monitoring to visualize DuckDB performance metrics
- Optimize DuckDB columnar storage for analytical workloads
- Configure DuckDB S3 integration for data lakes
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Configuration
DUCKDB_VERSION="0.9.2"
DUCKDB_USER="${SUDO_USER:-$USER}"
DUCKDB_HOME="/var/lib/duckdb"
CONFIG_DIR="/home/$DUCKDB_USER/.config/duckdb"
# Cleanup function
cleanup() {
echo -e "${RED}Installation failed. Cleaning up...${NC}"
rm -f /tmp/partition_setup.py
rm -f /tmp/monitoring_setup.py
exit 1
}
trap cleanup ERR
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -m, --memory LIMIT Set memory limit (default: 4GB)"
echo " -t, --threads COUNT Set thread count (default: 4)"
echo " -h, --help Show this help"
exit 1
}
# Parse arguments
MEMORY_LIMIT="4GB"
THREAD_COUNT="4"
while [[ $# -gt 0 ]]; do
case $1 in
-m|--memory)
MEMORY_LIMIT="$2"
shift 2
;;
-t|--threads)
THREAD_COUNT="$2"
shift 2
;;
-h|--help)
usage
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
usage
;;
esac
done
# Check if running as root or with sudo
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}This script must be run as root or with sudo${NC}"
exit 1
fi
# Detect distribution
echo -e "${YELLOW}[1/8] Detecting operating system...${NC}"
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_INSTALL="apt install -y"
UPDATE_CMD="apt update"
PYTHON_DEV="python3-dev"
BUILD_TOOLS="build-essential"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_INSTALL="dnf install -y"
UPDATE_CMD="dnf update -y"
PYTHON_DEV="python3-devel"
BUILD_TOOLS="gcc gcc-c++ make"
;;
amzn)
PKG_MGR="yum"
PKG_INSTALL="yum install -y"
UPDATE_CMD="yum update -y"
PYTHON_DEV="python3-devel"
BUILD_TOOLS="gcc gcc-c++ make"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
echo -e "${GREEN}Detected: $PRETTY_NAME${NC}"
else
echo -e "${RED}Cannot detect operating system${NC}"
exit 1
fi
# Update package manager
echo -e "${YELLOW}[2/8] Updating package manager...${NC}"
$UPDATE_CMD
# Install dependencies
echo -e "${YELLOW}[3/8] Installing system dependencies...${NC}"
$PKG_INSTALL python3-pip $PYTHON_DEV $BUILD_TOOLS
# Install Python packages
echo -e "${YELLOW}[4/8] Installing Python packages...${NC}"
pip3 install --upgrade pip
pip3 install duckdb==$DUCKDB_VERSION pandas pyarrow
# Create directories and set ownership
echo -e "${YELLOW}[5/8] Creating directories and setting up permissions...${NC}"
mkdir -p $DUCKDB_HOME/temp
mkdir -p $CONFIG_DIR
mkdir -p /opt/duckdb
chown -R $DUCKDB_USER:$DUCKDB_USER $DUCKDB_HOME
chown -R $DUCKDB_USER:$DUCKDB_USER $CONFIG_DIR
chmod 755 $DUCKDB_HOME
chmod 755 $DUCKDB_HOME/temp
chmod 755 $CONFIG_DIR
# Configure memory limits
echo -e "${YELLOW}[6/8] Configuring system memory limits...${NC}"
cat >> /etc/security/limits.conf << EOF
# DuckDB memory limits
$DUCKDB_USER soft as 8388608
$DUCKDB_USER hard as 16777216
$DUCKDB_USER soft memlock 4194304
$DUCKDB_USER hard memlock 8388608
EOF
# Create DuckDB configuration file
echo -e "${YELLOW}[7/8] Creating DuckDB configuration...${NC}"
cat > $CONFIG_DIR/config.sql << EOF
-- Memory configuration
SET memory_limit = '$MEMORY_LIMIT';
SET temp_directory = '$DUCKDB_HOME/temp';
SET threads = $THREAD_COUNT;
-- Performance settings
SET enable_progress_bar = true;
SET checkpoint_threshold = '16MB';
SET wal_autocheckpoint = 1000;
-- Optimization settings
SET enable_optimizer = true;
SET enable_profiling = true;
EOF
chown $DUCKDB_USER:$DUCKDB_USER $CONFIG_DIR/config.sql
chmod 644 $CONFIG_DIR/config.sql
# Create partition setup script
cat > /opt/duckdb/partition_setup.py << 'EOF'
#!/usr/bin/env python3
import duckdb
import os
from datetime import datetime, timedelta
def create_partitioned_tables():
db_path = '/var/lib/duckdb/analytics.db'
conn = duckdb.connect(db_path)
# Load configuration
config_path = os.path.expanduser('~/.config/duckdb/config.sql')
if os.path.exists(config_path):
with open(config_path, 'r') as f:
config = f.read()
for line in config.split('\n'):
if line.strip() and not line.strip().startswith('--'):
try:
conn.execute(line.strip())
except:
pass
# Create sales table with indexing
conn.execute("""
CREATE TABLE IF NOT EXISTS sales_data (
id INTEGER PRIMARY KEY,
transaction_date DATE,
customer_id INTEGER,
product_id INTEGER,
amount DECIMAL(10,2),
region VARCHAR(50)
);
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_sales_date ON sales_data(transaction_date);")
conn.execute("CREATE INDEX IF NOT EXISTS idx_sales_customer ON sales_data(customer_id);")
# Create customer table with indexing
conn.execute("""
CREATE TABLE IF NOT EXISTS customer_data (
customer_id INTEGER PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100),
created_at TIMESTAMP,
lifetime_value DECIMAL(12,2)
);
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_customer_email ON customer_data(email);")
conn.close()
print("Tables and indexes created successfully")
if __name__ == "__main__":
create_partitioned_tables()
EOF
chmod 755 /opt/duckdb/partition_setup.py
chown $DUCKDB_USER:$DUCKDB_USER /opt/duckdb/partition_setup.py
# Create monitoring script
cat > /opt/duckdb/monitoring_setup.py << 'EOF'
#!/usr/bin/env python3
import duckdb
import time
import json
def monitor_performance():
conn = duckdb.connect('/var/lib/duckdb/analytics.db')
# Enable profiling
conn.execute("SET enable_profiling = true;")
# Sample query for monitoring
result = conn.execute("SELECT COUNT(*) as table_count FROM information_schema.tables WHERE table_schema = 'main';").fetchone()
stats = {
'timestamp': time.time(),
'tables': result[0] if result else 0,
'memory_usage': conn.execute("SELECT current_setting('memory_limit');").fetchone()[0]
}
print(json.dumps(stats, indent=2))
conn.close()
if __name__ == "__main__":
monitor_performance()
EOF
chmod 755 /opt/duckdb/monitoring_setup.py
chown $DUCKDB_USER:$DUCKDB_USER /opt/duckdb/monitoring_setup.py
# Run initial setup
echo -e "${YELLOW}[8/8] Running initial setup...${NC}"
sudo -u $DUCKDB_USER python3 /opt/duckdb/partition_setup.py
# Verification
echo -e "${YELLOW}Verifying installation...${NC}"
VERIFICATION_PASSED=true
# Check if DuckDB Python module is available
if ! sudo -u $DUCKDB_USER python3 -c "import duckdb; print(f'DuckDB version: {duckdb.__version__}')" 2>/dev/null; then
echo -e "${RED}✗ DuckDB Python module not properly installed${NC}"
VERIFICATION_PASSED=false
else
echo -e "${GREEN}✓ DuckDB Python module installed${NC}"
fi
# Check directories
if [ -d "$DUCKDB_HOME" ] && [ -d "$CONFIG_DIR" ]; then
echo -e "${GREEN}✓ Directories created${NC}"
else
echo -e "${RED}✗ Directories not properly created${NC}"
VERIFICATION_PASSED=false
fi
# Check configuration file
if [ -f "$CONFIG_DIR/config.sql" ]; then
echo -e "${GREEN}✓ Configuration file created${NC}"
else
echo -e "${RED}✗ Configuration file not created${NC}"
VERIFICATION_PASSED=false
fi
# Check database creation
if sudo -u $DUCKDB_USER python3 /opt/duckdb/monitoring_setup.py >/dev/null 2>&1; then
echo -e "${GREEN}✓ Database operational${NC}"
else
echo -e "${RED}✗ Database not operational${NC}"
VERIFICATION_PASSED=false
fi
if [ "$VERIFICATION_PASSED" = true ]; then
echo -e "${GREEN}DuckDB installation completed successfully!${NC}"
echo -e "${GREEN}Configuration:${NC}"
echo -e " Memory limit: $MEMORY_LIMIT"
echo -e " Thread count: $THREAD_COUNT"
echo -e " Data directory: $DUCKDB_HOME"
echo -e " Config directory: $CONFIG_DIR"
echo -e "${YELLOW}Usage:${NC}"
echo -e " Setup tables: python3 /opt/duckdb/partition_setup.py"
echo -e " Monitor: python3 /opt/duckdb/monitoring_setup.py"
else
echo -e "${RED}Installation completed with errors. Please check the output above.${NC}"
exit 1
fi
Review the script before running. Execute with: bash install.sh