allocate_data_node function improvement

- Remove "if" condition of allocate_data_node
- Dealy 5 seconds for wait_to_join initial check to start
- Set 60 minutes timeout for wait_to_join function

Change-Id: Ie42af89551bd8804b87fe936c676e85130564187
This commit is contained in:
willxz 2020-07-02 15:16:37 -04:00
parent 88b79920db
commit e89c1c3c06

View File

@ -35,20 +35,24 @@ function stop () {
} }
function wait_to_join() { function wait_to_join() {
# delay 5 seconds before the first check
sleep 5
joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true ) joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
i=0
while [ -z "$joined" ]; do while [ -z "$joined" ]; do
sleep 5 sleep 5
joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true ) joined=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" "${ELASTICSEARCH_ENDPOINT}/_cat/nodes" | grep -w $NODE_NAME || true )
i=$((i+1))
# Waiting for up to 60 minutes
if [ $i -gt 720 ]; then
break
fi
done done
} }
function allocate_data_node () { function allocate_data_node () {
if [ -f /data/restarting ]; then echo "Node ${NODE_NAME} has started. Waiting to rejoin the cluster."
rm /data/restarting
echo "Node ${NODE_NAME} has restarted. Waiting to rejoin the cluster."
wait_to_join wait_to_join
echo "Re-enabling Replica Shard Allocation" echo "Re-enabling Replica Shard Allocation"
curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \ curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -XPUT -H 'Content-Type: application/json' \
"${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{ "${ELASTICSEARCH_ENDPOINT}/_cluster/settings" -d "{
@ -56,7 +60,6 @@ function allocate_data_node () {
\"cluster.routing.allocation.enable\": null \"cluster.routing.allocation.enable\": null
} }
}" }"
fi
} }
function start_master_node () { function start_master_node () {
@ -116,12 +119,12 @@ function start_data_node () {
# although the request itself still returns a 200 OK status. If there are failures, reissue the request. # although the request itself still returns a 200 OK status. If there are failures, reissue the request.
# (The only side effect of not doing so is slower start up times. See flush documentation linked above) # (The only side effect of not doing so is slower start up times. See flush documentation linked above)
touch /data/restarting
echo "Node ${NODE_NAME} is ready to shutdown" echo "Node ${NODE_NAME} is ready to shutdown"
kill -TERM 1 kill -TERM 1
} }
trap drain_data_node TERM EXIT HUP INT trap drain_data_node TERM EXIT HUP INT
wait wait
} }
$COMMAND $COMMAND