#!/bin/bash # # elasticsearch-backup-index.sh # # Push logstash index from yesterday to s3 with an accompanying restore script. # http://logstash.net # http://www.elasticsearch.org # https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd # # Inspiration: # http://tech.superhappykittymeow.com/?p=296 # # Must run on an elasticsearch node, and expects to find the index on this node.
usage() { cat << EOF
elasticsearch-backup-index.sh
Create a restorable backup of an elasticsearch index (assumes Logstash format indexes), and upload it to an existing S3 bucket. The default backs up an index from yesterday. Note that this script itself does not restart elasticsearch - the restore script that is generated for each backup will restart elasticsearch after restoring an archived index.
OPTIONS: -h Show this message -b S3 path for backups (Required) -g Consistent index name (default: logstash) -i Elasticsearch index directory (Required) -d Backup a specific date (format: YYYY.mm.dd) -c Command for s3cmd (default: s3cmd put) -t Temporary directory for archiving (default: /tmp) -p Persist local backups, by default backups are not kept locally -s Shards (default: 5) -r Replicas (default: 0) -e Elasticsearch URL (default: http://localhost:9200) -n How nice tar must be (default: 19) -u Restart command for elastic search (default 'service elasticsearch restart')
Connect to elasticsearch using 127.0.0.1 instead of localhost, backup the index "my_index" from 2013.05.21 instead of yesterday, use the s3cmd in /usr/local/bin explicitly, store the archive and restore script in /mnt/es/backups (and persist them) and use 'service es restart' to restart elastic search.
EOF }
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then # I don't want to troubleshoot the permissions of others echo "This script must be run as root." exit 1 fi
while getopts ":b:i:d:c:g:t:p:s:r:e:n:u:h" flag do case "$flag" in h) usage exit 0 ;; b) S3_BASE=$OPTARG ;; i) INDEX_DIR=$OPTARG ;; d) DATE=$OPTARG ;; c) S3CMD=$OPTARG ;; g) INAME=$OPTARG ;; t) TMP_DIR=$OPTARG ;; p) PERSIST=1 ;; s) if [[ $OPTARG =~ $RE_D ]]; then SHARDS=$OPTARG else ERROR="${ERROR}Shards must be an integer.\n" fi ;; r) if [[ $OPTARG =~ $RE_D ]]; then REPLICAS=$OPTARG else ERROR="${ERROR}Replicas must be an integer.\n" fi ;; e) ELASTICSEARCH=$OPTARG ;; n) if [[ $OPTARG =~ $RE_D ]]; then NICE=$OPTARG fi # If nice is not an integer, just use default ;; u) RESTART=$OPTARG ;; ?) usage exit 1 ;; esac done
# We need an S3 base path if [ -z "$S3_BASE" ]; then ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n" fi
# We need an elasticsearch index directory if [ -z "INDEX_DIR" ]; then ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n" fi
# If we have errors, show the errors with usage data and exit. if [ -n "$ERROR" ]; then echo -e $ERROR usage exit 1 fi
if [ -z "$INAME" ]; then INAME="logstash" fi
# Default logstash index naming is hardcoded, as are YYYY-mm container directories. if [ -n "$DATE" ]; then INDEX="$INAME-$DATE" YEARMONTH=${DATE//\./-} YEARMONTH=${YEARMONTH:0:7} else INDEX=`date --date='yesterday' +"$INAME-%Y.%m.%d"` YEARMONTH=`date --date='yesterday' +"%Y-%m"` fi S3_TARGET="$S3_BASE/$YEARMONTH"
# Make sure there is an index if ! [ -d $INDEX_DIR/$INDEX ]; then echo "The index $INDEX_DIR/$INDEX does not appear to exist." exit 1 fi
# Get metadata from elasticsearch INDEX_MAPPING=`curl -s -XGET "$ELASTICSEARCH/$INDEX/_mapping"` SETTINGS="{\"settings\":{\"number_of_shards\":$SHARDS,\"number_of_replicas\":$REPLICAS},\"mappings\":$INDEX_MAPPING}"
# Make the tmp directory if it does not already exist. if ! [ -d $TMP_DIR ]; then mkdir -p $TMP_DIR fi
# Tar and gzip the index dirextory. cd $INDEX_DIR nice -n $NICE tar czf $TMP_DIR/$INDEX.tgz $INDEX cd - > /dev/null
# Create a restore script for elasticsearch cat << EOF >> $TMP_DIR/${INDEX}-restore.sh #!/bin/bash # # ${INDEX}-restore.sh - restores elasticsearch index: $INDEX to elasticsearch # instance at $ELASTICSEARCH. This script expects to run in the same # directory as the $INDEX.tgz file.
# Make sure this index does not exist already TEST=\`curl -XGET "$ELASTICSEARCH/$INDEX/_status" 2> /dev/null | grep error\` if [ -z "\$TEST" ]; then echo "Index: $INDEX already exists on this elasticsearch node." exit 1 fi
# Extract index files DOWNLOAD_DIR=\`pwd\` cd $INDEX_DIR if [ -f \$DOWNLOAD_DIR/$INDEX.tgz ]; then # If we have the archive, create the new index in ES curl -XPUT '$ELASTICSEARCH/$INDEX/' -d '$SETTINGS' > /dev/null 2>&1 # Extract the archive in to the INDEX_DIR tar xzf \$DOWNLOAD_DIR/$INDEX.tgz # Restart elasticsearch to allow it to open the new dir and file data $RESTART exit 0 else echo "Unable to locate archive file \$DOWNLOAD_DIR/$INDEX.tgz." exit 1 fi
EOF
# Put archive and restore script in s3. $S3CMD $TMP_DIR/$INDEX.tgz $S3_TARGET/$INDEX.tgz $S3CMD $TMP_DIR/$INDEX-restore.sh $S3_TARGET/$INDEX-restore.sh
# cleanup tmp files if [ -z $PERSIST ]; then rm $TMP_DIR/$INDEX.tgz rm $TMP_DIR/$INDEX-restore.sh fi
#!/bin/bash # elasticsearch-close-old-indices.sh # # Close logstash format indices from elasticsearch maintaining only a # specified number. # http://logstash.net # http://www.elasticsearch.org # # Inspiration: # http://tech.superhappykittymeow.com/?p=296 # # Must have access to the specified elasticsearch node.
usage() { cat << EOF
elasticsearch-close-old-indices.sh
Compares the current list of indices to a configured value and closes any indices surpassing that value. Sort is lexicographical; the first n of a 'sort -r' list are kept, all others are closed.
OPTIONS: -h Show this message -i Indices to keep open (default: 14) -e Elasticsearch URL (default: http://localhost:9200) -g Consistent index name (default: logstash) -o Output actions to a specified file
EXAMPLES:
./elasticsearch-close-old-indices.sh
Connect to http://localhost:9200 and get a list of indices matching 'logstash'. Keep the top lexicographical 14 indices, close any others.
Connect to http://es.example.com:9200 and get a list of indices matching 'my-logs'. Keep the top 28 indices, close any others. When using a custom index naming scheme be sure that a 'sort -r' places the indices you want to keep at the top of the list. Output index closes to /mnt/es/logfile.log.
while getopts ":i:e:g:o:h" flag do case "$flag" in h) usage exit 0 ;; i) if [[ $OPTARG =~ $RE_D ]]; then KEEP=$OPTARG else ERROR="${ERROR}Indexes to keep must be an integer.\n" fi ;; e) ELASTICSEARCH=$OPTARG ;; g) GREP=$OPTARG ;; o) LOGFILE=$OPTARG ;; ?) usage exit 1 ;; esac done
# If we have errors, show the errors with usage data and exit. if [ -n "$ERROR" ]; then echo -e $ERROR usage exit 1 fi
# Get the indices from elasticsearch INDICES_TEXT=`curl -s "$ELASTICSEARCH/_cat/indices?v" | awk '/'$GREP'/{match($0, /[:blank]*('$GREP'.[^ ]+)[:blank]*/, m); print m[1];}' | sort -r`
if [ -z "$INDICES_TEXT" ]; then echo "No indices returned containing '$GREP' from $ELASTICSEARCH." exit 1 fi
# If we are logging, make sure we have a logfile TODO - handle errors here touch $LOGFILE
# Close indices declare -a INDEX=($INDICES_TEXT) if [ ${#INDEX[@]} -gt $KEEP ]; then for index in ${INDEX[@]:$KEEP};do # We don't want to accidentally close everything if [ -n "$index" ]; then echo -n `date "+[%Y-%m-%d %H:%M] "`" Closing index: $index." >> $LOGFILE curl -s -XPOST "$ELASTICSEARCH/$index/_flush" >> $LOGFILE curl -s -XPOST "$ELASTICSEARCH/$index/_close" >> $LOGFILE echo "." >> $LOGFILE fi done fi
#!/usr/bin/env bash # # Delete logstash format indices from elasticsearch maintaining only a # specified number. # # Inspiration: # https://github.com/imperialwicket/elasticsearch-logstash-index-mgmt/blob/master/elasticsearch-remove-old-indices.sh # # Must have access to the specified elasticsearch node.
OPTIONS: -h Show this message -d Expiration date (YYYY-MM-dd) from when we should start deleting the indices (default: 3 months ago) -e Elasticsearch URL (default: http://localhost:9200) -g Consistent index name (default: logstash) -o Output actions to a specified file
EXAMPLES:
./elasticsearch-remove-old-indices.sh
Connect to http://localhost:9200 and get a list of indices matching 'logstash'. Keep the indices from less than 3 months, delete any others.
Connect to http://es.example.com:9200 and get a list of indices matching 'my-logs'. Keep the indices created after the 25 april 1991, delete any others. Output index deletes to /mnt/es/logfile.log.
while getopts ":d:e:g:o:h" flag do case "$flag" in h) usage exit 0 ;; d) if [[ $OPTARG =~ $RE_DATE ]]; then DATE=$OPTARG else ERROR="${ERROR}Expiration date must be YYYY-MM-dd.\n" fi ;; e) ELASTICSEARCH=$OPTARG ;; g) INDEX_NAME=$OPTARG ;; o) LOGFILE=$OPTARG ;; ?) usage exit 1 ;; esac done
# If we have errors, show the errors with usage data and exit. if [ -n "$ERROR" ]; then echo -e $ERROR usage exit 1 fi
# Get the indices from elasticsearch INDICES_TEXT=`curl -s "$ELASTICSEARCH/_cat/indices?v" | awk '/'$INDEX_NAME'/{match($0, /[:blank]*('$INDEX_NAME'.[^ ]+)[:blank]*/, m); print m[1];}' | sort -r`
if [ -z "$INDICES_TEXT" ]; then echo "No indices returned containing '$GREP' from $ELASTICSEARCH." exit 1 fi
# If we are logging, make sure we have a logfile TODO - handle errors here if [ -n "$LOGFILE" ] && ! [ -e $LOGFILE ]; then touch $LOGFILE fi
# Delete indices declare -a INDEX=($INDICES_TEXT) for index in ${INDEX[@]};do # We don't want to accidentally delete everything if [ -n "$index" ]; then INDEX_DATE=$(echo $index | sed -n 's/.*\([0-9]\{4\}\.[0-9]\{2\}\.[0-9]\{2\}\).*/\1/p'| sed 's/\./-/g') if [ $(date -d $DATE +"%Y%m%d") -ge $(date -d $INDEX_DATE +"%Y%m%d") ]; then echo `date "+[%Y-%m-%d %H:%M] "`" Deleting index: $index." >> $LOGFILE curl -s -XDELETE "$ELASTICSEARCH/$index/" >> $LOGFILE fi fi done exit 0
#!/bin/bash # elasticsearch-remove-old-indices.sh # # Delete logstash format indices from elasticsearch maintaining only a # specified number. # http://logstash.net # http://www.elasticsearch.org # # Inspiration: # http://tech.superhappykittymeow.com/?p=296 # # Must have access to the specified elasticsearch node.
usage() { cat << EOF
elasticsearch-remove-old-indices.sh
Compares the current list of indices to a configured value and deletes any indices surpassing that value. Sort is lexicographical; the first n of a 'sort -r' list are kept, all others are deleted.
OPTIONS: -h Show this message -i Indices to keep (default: 14) -e Elasticsearch URL (default: http://localhost:9200) -g Consistent index name (default: logstash) -o Output actions to a specified file
EXAMPLES:
./elasticsearch-remove-old-indices.sh
Connect to http://localhost:9200 and get a list of indices matching 'logstash'. Keep the top lexicographical 14 indices, delete any others.
Connect to http://es.example.com:9200 and get a list of indices matching 'my-logs'. Keep the top 28 indices, delete any others. When using a custom index naming scheme be sure that a 'sort -r' places the indices you want to keep at the top of the list. Output index deletes to /mnt/es/logfile.log.
while getopts ":i:e:g:o:h" flag do case "$flag" in h) usage exit 0 ;; i) if [[ $OPTARG =~ $RE_D ]]; then KEEP=$OPTARG else ERROR="${ERROR}Indexes to keep must be an integer.\n" fi ;; e) ELASTICSEARCH=$OPTARG ;; g) GREP=$OPTARG ;; o) LOGFILE=$OPTARG ;; ?) usage exit 1 ;; esac done
# If we have errors, show the errors with usage data and exit. if [ -n "$ERROR" ]; then echo -e $ERROR usage exit 1 fi
# Get the indices from elasticsearch INDICES_TEXT=`curl -s "$ELASTICSEARCH/_cat/indices?v" | awk '/'$GREP'/{match($0, /[:blank]*('$GREP'.[^ ]+)[:blank]*/, m); print m[1];}' | sort -r`
if [ -z "$INDICES_TEXT" ]; then echo "No indices returned containing '$GREP' from $ELASTICSEARCH." exit 1 fi
# If we are logging, make sure we have a logfile TODO - handle errors here touch $LOGFILE
# Delete indices declare -a INDEX=($INDICES_TEXT) if [ ${#INDEX[@]} -gt $KEEP ]; then for index in ${INDEX[@]:$KEEP};do # We don't want to accidentally delete everything if [ -n "$index" ]; then echo `date "+[%Y-%m-%d %H:%M] "`" Deleting index: $index." >> $LOGFILE curl -s -XDELETE "$ELASTICSEARCH/$index/" >> $LOGFILE fi done fi
#!/bin/bash # # elasticsearch-restore-index.sh # # Retrieve a specified logstash index from s3 and restore with an accompanying # restore script. # http://logstash.net # http://www.elasticsearch.org # https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd # # Inspiration: # http://tech.superhappykittymeow.com/?p=296 # # Must run on an elasticsearch node with data, the restore script restarts # elasticsearch.
OPTIONS: -h Show this message -b S3 path for backups (Required) -i Elasticsearch index directory (Required) -d Date to retrieve (Required, format: YYYY.mm.dd) -t Temporary directory for download and extract (default: /tmp) -c Command for s3cmd (default: s3cmd get) -e Elasticsearch URL (default: http://localhost:9200) -n How nice tar must be (default: 19)
Get the backup and restore script for the 2013.05.01 index from this s3 bucket and restore the index to the provided elasticsearch index directory.
EOF }
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then # I don't want to troubleshoot the permissions of others echo "This script must be run as root." exit 1 fi
while getopts ":b:i:t:d:c:e:n:h" flag do case "$flag" in h) usage exit 0 ;; b) S3_BASE=$OPTARG ;; i) INDEX_DIR=$OPTARG ;; t) TMP_DIR=$OPTARG ;; d) DATE=$OPTARG ;; c) S3CMD=$OPTARG ;; e) ELASTICSEARCH=$OPTARG ;; n) if [[ $OPTARG =~ $RE_D ]]; then NICE=$OPTARG fi # If nice is not an integer, just use default ;; ?) usage exit 1 ;; esac done
# We need an S3 base path if [ -z "$S3_BASE" ]; then ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n" fi
# We need an elasticsearch index directory if [ -z "INDEX_DIR" ]; then ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n" fi
# We need a date to restore if [ -z "$DATE" ]; then ERROR="${ERROR}Please provide a date for restoration with -d.\n" fi
# If we have errors, show the errors with usage data and exit. if [ -n "$ERROR" ]; then echo -e $ERROR usage exit 1 fi
# Default logstash index naming is hardcoded, as are YYYY-mm container directories. INDEX="logstash-$DATE" YEARMONTH=${DATE//\./-} YEARMONTH=${YEARMONTH:0:7} S3_TARGET="$S3_BASE/$YEARMONTH"
# Get archive and execute the restore script. TODO check file existence first $S3CMD $S3_TARGET/$INDEX.tgz $TMP_DIR/$INDEX.tgz $S3CMD $S3_TARGET/$INDEX-restore.sh $TMP_DIR/$INDEX-restore.sh
if [ -f $TMP_DIR/$INDEX-restore.sh ]; then chmod 750 $TMP_DIR/$INDEX-restore.sh $TMP_DIR/$INDEX-restore.sh
# cleanup tmp files rm $TMP_DIR/$INDEX.tgz rm $TMP_DIR/$INDEX-restore.sh else echo "Unable to find restore script, does that backup exist?" exit 1 fi
# This is a wrapper script for daily run # i.e. you can run it by cron as follows ## m h dom mon dow command # 11 4 * * * /opt/es/es-backup-index.sh >> /var/log/elasticsearch/esindexbackup.log
# Assuming you have the scripts inside '/opt/es/' folder. Or adjust the path to your taste. # # Set your system realities here S3URL="s3://elasticsearch-backups" ESDATA="/mnt/disk2/es/data/elasticsearch/nodes/0/indices/" DAYS=7
# Read through all the available ES indices and generate a list of unique index names # then proceed on all the indices for i in `ls -1 $ESDATA | sed -r -e 's/-+[0-9]{4}\.[0-9]{2}\.[0-9]{2}$//' | uniq` ; do
echo -n " *** Daily index backup for index name '$i' begin: " date /opt/es/elasticsearch-backup-index.sh -b $S3URL -i $ESDATA -g $i
echo -n " *** Close indices for index name '$i' which are > $DAYS days old : " date /opt/es/elasticsearch-close-old-indices.sh -i $DAYS -g $i
echo -n " *** Delete indices for index name '$i' which are > $DAYS days old : " date /opt/es/elasticsearch-remove-old-indices.sh -i $DAYS -g $i echo " ==== Done for index name '$i' ==== " echo " " done