Configure custom text analyzers, optimize field mappings, and implement index templates to dramatically improve Elasticsearch search performance and relevance for production workloads.
Prerequisites
- Elasticsearch 8.x installed and running
- Basic understanding of Elasticsearch concepts
- Administrative access to Elasticsearch cluster
What this solves
Elasticsearch performance depends heavily on how you configure text analysis and field mappings. Out-of-the-box settings work for basic use cases, but production search applications need custom analyzers to handle domain-specific text, optimized field mappings to reduce storage overhead, and proper index templates for consistent configuration across indices.
Step-by-step configuration
Install required packages
Install curl and jq for API interactions and JSON processing with Elasticsearch.
sudo apt update
sudo apt install -y curl jq
Create custom text analyzers
Configure analyzers for different text processing needs. This example creates analyzers for product search, email processing, and multilingual content.
curl -X PUT "localhost:9200/products_v1" -H "Content-Type: application/json" -d '
{
"settings": {
"analysis": {
"tokenizer": {
"email_tokenizer": {
"type": "pattern",
"pattern": "([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)"
}
},
"filter": {
"product_synonym": {
"type": "synonym",
"synonyms": [
"laptop,notebook,computer",
"mobile,phone,smartphone",
"tv,television,monitor"
]
},
"product_stemmer": {
"type": "stemmer",
"language": "english"
},
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"product_search": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"product_synonym",
"product_stemmer",
"stop"
]
},
"product_autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"edge_ngram_filter"
]
},
"email_analyzer": {
"type": "custom",
"tokenizer": "email_tokenizer",
"filter": [
"lowercase"
]
}
}
}
}
}'
Configure optimized field mappings
Define field mappings that balance search performance with storage efficiency. Use appropriate field types and disable unnecessary features.
curl -X PUT "localhost:9200/products_v1/_mapping" -H "Content-Type: application/json" -d '
{
"properties": {
"title": {
"type": "text",
"analyzer": "product_search",
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "product_autocomplete",
"search_analyzer": "standard"
},
"exact": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",
"analyzer": "product_search",
"index_options": "freqs",
"norms": false
},
"category": {
"type": "keyword",
"ignore_above": 64
},
"tags": {
"type": "keyword",
"ignore_above": 32
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"created_at": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"in_stock": {
"type": "boolean"
},
"metadata": {
"type": "object",
"enabled": false
},
"internal_id": {
"type": "keyword",
"index": false
}
}
}'
Create index templates for consistency
Index templates ensure all new indices follow the same configuration patterns automatically.
curl -X PUT "localhost:9200/_index_template/products_template" -H "Content-Type: application/json" -d '
{
"index_patterns": ["products_*"],
"priority": 100,
"template": {
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"refresh_interval": "5s",
"max_result_window": 50000,
"analysis": {
"tokenizer": {
"email_tokenizer": {
"type": "pattern",
"pattern": "([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)"
}
},
"filter": {
"product_synonym": {
"type": "synonym",
"synonyms": [
"laptop,notebook,computer",
"mobile,phone,smartphone",
"tv,television,monitor"
]
},
"product_stemmer": {
"type": "stemmer",
"language": "english"
},
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"product_search": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"product_synonym",
"product_stemmer",
"stop"
]
},
"product_autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"edge_ngram_filter"
]
}
}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
"title": {
"type": "text",
"analyzer": "product_search",
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "product_autocomplete",
"search_analyzer": "standard"
},
"exact": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",
"analyzer": "product_search",
"index_options": "freqs",
"norms": false
},
"category": {
"type": "keyword",
"ignore_above": 64
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"created_at": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
}
}
}'
Optimize search queries
Use specific field targets and appropriate query types to maximize performance with your custom analyzers.
curl -X GET "localhost:9200/products_v1/_search" -H "Content-Type: application/json" -d '
{
"query": {
"bool": {
"should": [
{
"match": {
"title": {
"query": "laptop computer",
"boost": 3.0
}
}
},
{
"match": {
"title.autocomplete": {
"query": "laptop",
"boost": 2.0
}
}
},
{
"match": {
"description": {
"query": "laptop computer",
"boost": 1.0
}
}
}
],
"filter": [
{
"term": {
"in_stock": true
}
},
{
"range": {
"price": {
"gte": 500,
"lte": 2000
}
}
}
]
}
},
"sort": [
"_score",
{
"created_at": {
"order": "desc"
}
}
],
"size": 20
}'
Configure index lifecycle management
Set up ILM policies to automatically manage index performance and storage costs over time.
curl -X PUT "localhost:9200/_ilm/policy/products_policy" -H "Content-Type: application/json" -d '
{
"policy": {
"phases": {
"hot": {
"actions": {
"rollover": {
"max_size": "5gb",
"max_age": "30d",
"max_docs": 10000000
},
"set_priority": {
"priority": 100
}
}
},
"warm": {
"min_age": "30d",
"actions": {
"set_priority": {
"priority": 50
},
"allocate": {
"number_of_replicas": 0
},
"forcemerge": {
"max_num_segments": 1
}
}
},
"cold": {
"min_age": "90d",
"actions": {
"set_priority": {
"priority": 10
}
}
},
"delete": {
"min_age": "365d",
"actions": {
"delete": {}
}
}
}
}
}'
Set up monitoring and alerting
Configure monitoring to track search performance metrics and identify optimization opportunities.
# Check cluster health and performance
curl -X GET "localhost:9200/_cluster/health?pretty"
curl -X GET "localhost:9200/_nodes/stats/indices?pretty"
Monitor search performance
curl -X GET "localhost:9200/products_v1/_stats/search?pretty"
Check slow query logs
curl -X PUT "localhost:9200/products_v1/_settings" -H "Content-Type: application/json" -d '
{
"index.search.slowlog.threshold.query.warn": "10s",
"index.search.slowlog.threshold.query.info": "5s",
"index.search.slowlog.threshold.query.debug": "2s",
"index.search.slowlog.threshold.fetch.warn": "1s",
"index.search.slowlog.threshold.fetch.info": "800ms",
"index.search.slowlog.level": "info"
}'
Verify your setup
Test your custom analyzers and verify the optimized field mappings are working correctly.
# Test the custom analyzer
curl -X GET "localhost:9200/products_v1/_analyze" -H "Content-Type: application/json" -d '
{
"analyzer": "product_search",
"text": "Gaming laptop notebook computer"
}'
Check mapping configuration
curl -X GET "localhost:9200/products_v1/_mapping?pretty"
Verify index template
curl -X GET "localhost:9200/_index_template/products_template?pretty"
Test autocomplete functionality
curl -X GET "localhost:9200/products_v1/_analyze" -H "Content-Type: application/json" -d '
{
"analyzer": "product_autocomplete",
"text": "laptop"
}'
Check index stats and performance
curl -X GET "localhost:9200/products_v1/_stats?pretty&human"
Advanced optimization techniques
Configure search-time boosting
Implement dynamic scoring based on business metrics like popularity, recency, or conversion rates.
curl -X GET "localhost:9200/products_v1/_search" -H "Content-Type: application/json" -d '
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "laptop",
"fields": ["title^3", "description"]
}
},
"functions": [
{
"filter": {
"range": {
"created_at": {
"gte": "now-30d"
}
}
},
"weight": 1.5
},
{
"field_value_factor": {
"field": "popularity_score",
"factor": 1.2,
"modifier": "log1p",
"missing": 1
}
}
],
"score_mode": "multiply",
"boost_mode": "multiply"
}
}
}'
Implement search result highlighting
Add highlighting to improve search relevance visibility and user experience.
curl -X GET "localhost:9200/products_v1/_search" -H "Content-Type: application/json" -d '
{
"query": {
"multi_match": {
"query": "wireless headphones",
"fields": ["title", "description"]
}
},
"highlight": {
"fields": {
"title": {
"pre_tags": [""],
"post_tags": [""],
"fragment_size": 100,
"number_of_fragments": 3
},
"description": {
"pre_tags": [""],
"post_tags": [""],
"fragment_size": 150,
"number_of_fragments": 2
}
}
}
}'
Performance monitoring and tuning
Create a monitoring script to track key performance indicators:
#!/bin/bash
Elasticsearch Performance Monitor
ES_HOST="localhost:9200"
LOG_FILE="/var/log/elasticsearch_performance.log"
DATE=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$DATE] Starting Elasticsearch performance check" >> $LOG_FILE
Check cluster health
HEALTH=$(curl -s "$ES_HOST/_cluster/health" | jq -r '.status')
echo "[$DATE] Cluster health: $HEALTH" >> $LOG_FILE
Check search performance
SEARCH_STATS=$(curl -s "$ES_HOST/_nodes/stats/indices/search" | jq '.nodes[] | {query_total: .indices.search.query_total, query_time_in_millis: .indices.search.query_time_in_millis}')
echo "[$DATE] Search stats: $SEARCH_STATS" >> $LOG_FILE
Check memory usage
MEMORY_STATS=$(curl -s "$ES_HOST/_nodes/stats/jvm" | jq '.nodes[] | {heap_used_percent: .jvm.mem.heap_used_percent}')
echo "[$DATE] Memory usage: $MEMORY_STATS" >> $LOG_FILE
Alert if performance degrades
if [[ "$HEALTH" != "green" ]]; then
echo "[$DATE] WARNING: Cluster health is $HEALTH" >> $LOG_FILE
# Add alerting logic here
fi
Make the script executable and add to cron:
sudo chmod +x /opt/scripts/elasticsearch_performance.sh
echo "/5 * /opt/scripts/elasticsearch_performance.sh" | sudo crontab -
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Slow search response times | Inefficient analyzers or large result sets | Optimize analyzers, use filters instead of queries, implement pagination |
| High memory usage | Too many field mappings or large synonyms | Use ignore_above limits, optimize synonym lists, disable unnecessary features |
| Synonym filter not working | Synonyms applied at wrong stage | Place synonym filter before stemmer in analyzer chain |
| Autocomplete returning irrelevant results | Edge n-gram settings too broad | Adjust min_gram and max_gram values, use separate search analyzer |
| Index template not applied | Wrong pattern or priority conflicts | Check pattern matching with GET _index_template and adjust priority |
| Custom analyzer errors | Invalid filter order or configuration | Test analyzer with _analyze API, check filter dependencies |
Next steps
- Set up cross-cluster replication for disaster recovery
- Configure automated data lifecycle management
- Automate backup strategies with snapshot management
- Implement ML-based search optimization and anomaly detection
- Configure advanced security with authentication and RBAC
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
readonly RED='\033[0;31m'
readonly GREEN='\033[0;32m'
readonly YELLOW='\033[1;33m'
readonly NC='\033[0m' # No Color
# Global variables
ELASTICSEARCH_HOST="${1:-localhost}"
ELASTICSEARCH_PORT="${2:-9200}"
PKG_MGR=""
PKG_INSTALL=""
# Usage message
usage() {
echo "Usage: $0 [elasticsearch_host] [elasticsearch_port]"
echo " elasticsearch_host: Elasticsearch host (default: localhost)"
echo " elasticsearch_port: Elasticsearch port (default: 9200)"
exit 1
}
# Logging functions
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
# Cleanup on failure
cleanup() {
log_error "Script failed. Check the logs above for details."
}
trap cleanup ERR
# Check prerequisites
check_prerequisites() {
echo "[1/6] Checking prerequisites..."
if [[ $EUID -eq 0 ]]; then
log_error "This script should not be run as root for security reasons"
exit 1
fi
if ! command -v sudo >/dev/null 2>&1; then
log_error "sudo is required but not installed"
exit 1
fi
# Detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol)
PKG_MGR="dnf"
PKG_INSTALL="dnf install -y"
;;
fedora)
PKG_MGR="dnf"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_INSTALL="yum install -y"
;;
*)
log_error "Unsupported distribution: $ID"
exit 1
;;
esac
else
log_error "Cannot detect distribution"
exit 1
fi
log_info "Detected distribution: $ID"
log_info "Using package manager: $PKG_MGR"
}
# Install required packages
install_packages() {
echo "[2/6] Installing required packages..."
case "$PKG_MGR" in
apt)
sudo apt update
sudo $PKG_INSTALL curl jq
;;
dnf|yum)
sudo $PKG_INSTALL curl jq
;;
esac
log_info "Required packages installed successfully"
}
# Check Elasticsearch connectivity
check_elasticsearch() {
echo "[3/6] Checking Elasticsearch connectivity..."
if ! curl -sf "http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/_cluster/health" >/dev/null; then
log_error "Cannot connect to Elasticsearch at ${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}"
log_error "Please ensure Elasticsearch is running and accessible"
exit 1
fi
log_info "Elasticsearch is accessible at ${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}"
}
# Create custom analyzers and index
create_analyzers() {
echo "[4/6] Creating custom analyzers and index..."
local response_code
response_code=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \
"http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/products_v1" \
-H "Content-Type: application/json" \
-d '{
"settings": {
"analysis": {
"tokenizer": {
"email_tokenizer": {
"type": "pattern",
"pattern": "([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)"
}
},
"filter": {
"product_synonym": {
"type": "synonym",
"synonyms": [
"laptop,notebook,computer",
"mobile,phone,smartphone",
"tv,television,monitor"
]
},
"product_stemmer": {
"type": "stemmer",
"language": "english"
},
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"product_search": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "product_synonym", "product_stemmer", "stop"]
},
"product_autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "edge_ngram_filter"]
},
"email_analyzer": {
"type": "custom",
"tokenizer": "email_tokenizer",
"filter": ["lowercase"]
}
}
}
}
}')
if [[ "$response_code" != "200" && "$response_code" != "201" ]]; then
log_error "Failed to create index with custom analyzers (HTTP $response_code)"
exit 1
fi
log_info "Custom analyzers created successfully"
}
# Configure field mappings
configure_mappings() {
echo "[5/6] Configuring optimized field mappings..."
local response_code
response_code=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \
"http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/products_v1/_mapping" \
-H "Content-Type: application/json" \
-d '{
"properties": {
"title": {
"type": "text",
"analyzer": "product_search",
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "product_autocomplete",
"search_analyzer": "standard"
},
"exact": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",
"analyzer": "product_search",
"index_options": "freqs",
"norms": false
},
"category": {
"type": "keyword",
"ignore_above": 64
},
"tags": {
"type": "keyword",
"ignore_above": 32
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"created_at": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"in_stock": {
"type": "boolean"
},
"metadata": {
"type": "object",
"enabled": false
},
"internal_id": {
"type": "keyword",
"index": false
}
}
}')
if [[ "$response_code" != "200" ]]; then
log_error "Failed to configure field mappings (HTTP $response_code)"
exit 1
fi
log_info "Field mappings configured successfully"
}
# Create index template
create_index_template() {
echo "[6/6] Creating index template for consistency..."
local response_code
response_code=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \
"http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/_index_template/products_template" \
-H "Content-Type: application/json" \
-d '{
"index_patterns": ["products_*"],
"priority": 100,
"template": {
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"refresh_interval": "5s",
"max_result_window": 50000
}
}
}')
if [[ "$response_code" != "200" ]]; then
log_error "Failed to create index template (HTTP $response_code)"
exit 1
fi
log_info "Index template created successfully"
}
# Verification
verify_setup() {
echo
log_info "Verifying Elasticsearch optimization setup..."
# Check index exists and has custom analyzers
if curl -sf "http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/products_v1/_settings" | jq -e '.products_v1.settings.index.analysis.analyzer' >/dev/null; then
log_info "✓ Custom analyzers are configured"
else
log_error "✗ Custom analyzers verification failed"
exit 1
fi
# Check mappings are configured
if curl -sf "http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/products_v1/_mapping" | jq -e '.products_v1.mappings.properties.title' >/dev/null; then
log_info "✓ Field mappings are configured"
else
log_error "✗ Field mappings verification failed"
exit 1
fi
# Check index template exists
if curl -sf "http://${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}/_index_template/products_template" >/dev/null; then
log_info "✓ Index template is configured"
else
log_error "✗ Index template verification failed"
exit 1
fi
echo
log_info "Elasticsearch optimization completed successfully!"
log_info "Index: products_v1"
log_info "Template: products_template (applies to products_* indices)"
log_info "Analyzers: product_search, product_autocomplete, email_analyzer"
}
# Main execution
main() {
if [[ $# -gt 2 ]]; then
usage
fi
check_prerequisites
install_packages
check_elasticsearch
create_analyzers
configure_mappings
create_index_template
verify_setup
}
main "$@"
Review the script before running. Execute with: bash install.sh