c0617ebb53
All rmon resource monitoring has been moved to collectd. This update removes rmon from mtce and the load. Story: 2002823 Task: 30045 Test Plan: PASS: Build and install a standard system. PASS: Inspect mtce rpm list PASS: Inspect logs PASS: Check pmon.d Depends-On: https://review.openstack.org/#/c/643739 Change-Id: I927862895272fdd024d281ab49e0a128465b1b3f Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
477 lines
15 KiB
Bash
Executable File
477 lines
15 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2016 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
##############################################################################
|
|
#
|
|
# This script supports no-reboot patching of any single or
|
|
# combination of maintenance processes specified on the command line.
|
|
#
|
|
# Calling sequence:
|
|
#
|
|
# rc=mtce-restart process1 process2 process3 ...
|
|
# if [ $? != 0 ] ; then
|
|
# restart action failed
|
|
#
|
|
#
|
|
###############################################################################
|
|
#
|
|
# The patching subsystem provides a patch-functions bash source file
|
|
# with useful function and variable definitions.
|
|
#
|
|
if [ -e "/etc/patching/patch-functions" ] ; then
|
|
. /etc/patching/patch-functions
|
|
fi
|
|
|
|
loginfo "----------------------------------------------"
|
|
loginfo "Maintenance No-Reboot Patching Restart Request"
|
|
|
|
#
|
|
# Declare an overall script return code
|
|
#
|
|
declare -i GLOBAL_RC=$PATCH_STATUS_FAILED
|
|
|
|
#if [ ! -e $PATCH_FLAGDIR ] ; then
|
|
# mkdir -p $PATCH_FLAGDIR
|
|
#fi
|
|
|
|
# if set with -c or --clean options then the flag files for
|
|
# each process are removed at the start.
|
|
CLEAN=false
|
|
|
|
#
|
|
# Completion status ; stored in PID index
|
|
#
|
|
DISABLED="disabled"
|
|
NOPID="not-running"
|
|
SKIPPED="skipped"
|
|
RESTARTED="restarted"
|
|
|
|
#
|
|
# process query and restart executables
|
|
#
|
|
SM_RESTART_EXEC="sm-restart-safe"
|
|
SM_QUERY_EXEC="sm-query"
|
|
PMON_RESTART_EXEC="pmon-restart"
|
|
|
|
#
|
|
# Struct indexes
|
|
#
|
|
PROCESS_INDEX=0
|
|
PID_INDEX=1
|
|
ALIAS_INDEX=2
|
|
|
|
|
|
#
|
|
# Process Struct and List [ name ] [ alias ] [ pid | status ]
|
|
#
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
# NOTE TO PATCH WRITERS: Simply Un-Comment processes you want no-reboot patch restarted.
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
#
|
|
|
|
# The process restart control structure
|
|
declare sm_managed_processes=""
|
|
declare pmon_managed_processes=""
|
|
|
|
# Build the process list.
|
|
# All arguements should be a valid maintenance process name.
|
|
# The name of the binary, not the SM alias.
|
|
# See the list below for supported process names.
|
|
while [[ ${#} > 0 ]]
|
|
do
|
|
process="${1}"
|
|
case $process in
|
|
|
|
-c|--clean)
|
|
CLEAN=true
|
|
;;
|
|
|
|
# Maintenance Processes - SM managed
|
|
"mtcAgent")
|
|
sm_managed_processes=( ${sm_managed_processes[@]} "mtcAgent:0:mtc-agent")
|
|
;;
|
|
"guestAgent")
|
|
sm_managed_processes=( ${sm_managed_processes[@]} "guestAgent:0:guest-agent")
|
|
;;
|
|
"hwmond")
|
|
sm_managed_processes=( ${sm_managed_processes[@]} "hwmond:0:hw-mon")
|
|
;;
|
|
|
|
# Maintenance Processes - PMON managed
|
|
"pmond")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "pmond:0")
|
|
;;
|
|
"guestServer")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "guestServer:0")
|
|
;;
|
|
"hbsAgent")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "hbsAgent:0")
|
|
;;
|
|
"mtcClient")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "mtcClient:0")
|
|
;;
|
|
"hbsClient")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "hbsClient:0")
|
|
;;
|
|
"hostwd")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "hostwd:0")
|
|
;;
|
|
"fsmond")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "fsmond:0")
|
|
;;
|
|
"mtclogd")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "mtclogd:0")
|
|
;;
|
|
"mtcalarmd")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "mtcalarmd:0")
|
|
;;
|
|
"lmond")
|
|
pmon_managed_processes=(${pmon_managed_processes[@]} "lmond:0")
|
|
;;
|
|
|
|
*)
|
|
loginfo "Unknown process:${process}"
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
# Assume both groupings are done until we know there are not
|
|
sm_done=true
|
|
pmon_done=true
|
|
|
|
#if [ ${#sm_managed_processes[@]} -ne 0 -a is_controller ] ; then
|
|
if [ -n "${sm_managed_processes}" -a is_controller ] ; then
|
|
|
|
# Record current process IDs
|
|
index=0
|
|
for DAEMON in "${sm_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
if [ "${CLEAN}" = true ] ; then
|
|
rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
|
|
fi
|
|
|
|
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
|
|
if [ -z "${info[${PID_INDEX}]}" ] ; then
|
|
loginfo "${info[${PROCESS_INDEX}]} is not running"
|
|
info[${PID_INDEX}]="${NOPID}"
|
|
fi
|
|
|
|
# Save the PID or NOPID status to the process line
|
|
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
|
|
|
|
((index++))
|
|
done
|
|
|
|
# Restart the processes
|
|
index=0
|
|
for DAEMON in "${sm_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
|
|
info[${PID_INDEX}]="${SKIPPED}"
|
|
|
|
# Add the PID to the process line
|
|
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
|
|
((index++))
|
|
|
|
continue
|
|
fi
|
|
sm_query_result=`${SM_QUERY_EXEC} service ${info[${ALIAS_INDEX}]}`
|
|
if [[ "${sm_query_result}" == *"enabled-active"* ]] ; then
|
|
# Save the original PID
|
|
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
|
|
|
|
if [ -n "${info[${PID_INDEX}]}" ] ; then
|
|
|
|
loginfo "sm-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
|
|
touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
|
|
${SM_RESTART_EXEC} service "${info[${ALIAS_INDEX}]}"
|
|
sm_done=false
|
|
sleep 5
|
|
|
|
else
|
|
loginfo "${info[${PROCESS_INDEX}]} is not running ; must be on inactive controller"
|
|
info[${PID_INDEX}]="${NOPID}"
|
|
fi
|
|
elif [[ ${sm_query_result} == *"is enabling"* ]] ; then
|
|
info[${PID_INDEX}]="${NOPID}"
|
|
loginfo "sm-restart ${info[${PROCESS_INDEX}]} ; [in progress] ; [pid:${info[${PID_INDEX}]}]"
|
|
else
|
|
info[${PID_INDEX}]="${DISABLED}"
|
|
loginfo "${info[${PROCESS_INDEX}]} is not active"
|
|
fi
|
|
|
|
# Add the PID to the process line
|
|
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
|
|
|
|
((index++))
|
|
done
|
|
fi
|
|
|
|
if [ -n "${pmon_managed_processes}" ] ; then
|
|
|
|
echo "DEBUG: pmon_managed_processes:${pmon_managed_processes}"
|
|
|
|
# Restart the pmond processes
|
|
index=0
|
|
for DAEMON in "${pmon_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
if [ "${CLEAN}" = true ] ; then
|
|
rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
|
|
fi
|
|
|
|
if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
|
|
info[${PID_INDEX}]="${SKIPPED}"
|
|
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
|
|
((index++))
|
|
continue
|
|
fi
|
|
|
|
# Save the original PID
|
|
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
|
|
|
|
if [ -n "${info[${PID_INDEX}]}" ] ; then
|
|
loginfo "pmon-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
|
|
touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
|
|
${PMON_RESTART_EXEC} ${info[${PROCESS_INDEX}]}
|
|
pmon_done=false
|
|
sleep 2
|
|
|
|
####################################################################
|
|
# Special Handling Section
|
|
#
|
|
# - pmond needs 30 seconds to restart before it will start
|
|
# monitoring processes.We can maybe remove that in the daemon
|
|
# config file but for now its there and we have to wait.
|
|
####################################################################
|
|
if [ "${info[${PROCESS_INDEX}]}" == "pmond" ] ; then
|
|
sleep 30
|
|
fi
|
|
|
|
else
|
|
info[${PID_INDEX}]="${DISABLED}"
|
|
loginfo "${info[${PROCESS_INDEX}]} is not active"
|
|
fi
|
|
|
|
# Save the updated PID or other status to the process line
|
|
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
|
|
|
|
((index++))
|
|
done
|
|
fi
|
|
|
|
# check for done. If this is not met in timeout then fail is returned
|
|
if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
|
|
GLOBAL_RC=$PATCH_STATUS_OK
|
|
loginfo " SM Processes: ${sm_managed_processes[@]}"
|
|
loginfo "PMON Processes: ${pmon_managed_processes[@]}"
|
|
loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC} - nothing to do."
|
|
exit ${GLOBAL_RC}
|
|
fi
|
|
|
|
# Monitor the restart of SM processes
|
|
#
|
|
# Don't want to start from the beginning of the shell
|
|
# Want time zero now plus 20 seconds.
|
|
#
|
|
SECONDS=0
|
|
TIMEOUT=120
|
|
let UNTIL=${SECONDS}+${TIMEOUT}
|
|
loginfo "restart timeout is ${TIMEOUT}"
|
|
|
|
while [ ${UNTIL} -ge ${SECONDS} ]
|
|
do
|
|
if [ "$sm_done" = false ] ; then
|
|
if [ is_controller -o is_cpe ] ; then
|
|
sm_not_done=false
|
|
index=0
|
|
for DAEMON in "${sm_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
# Don't wast time on processes that are being skipped due to past restart
|
|
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't wast time on processes that have already restarted
|
|
elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't look for disabled processes
|
|
elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't look at not running processes
|
|
elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
elif [[ `sm-query service ${info[${ALIAS_INDEX}]}` == *"enabled-active"* ]] ; then
|
|
|
|
# Save the original PID
|
|
new_pid=`pidof ${info[${PROCESS_INDEX}]}`
|
|
if [ $? -eq 0 -a -n ${new_pid} ] ; then
|
|
|
|
if [ "${info[${PID_INDEX}]}" != "${new_pid}" ] ; then
|
|
loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [pid:${info[${PID_INDEX}]} -> ${new_pid}]"
|
|
info[${PID_INDEX}]="${RESTARTED}"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
|
|
sm_not_done=true
|
|
fi
|
|
|
|
# Add the PID to the process line
|
|
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
|
|
|
|
((index++))
|
|
done
|
|
fi
|
|
|
|
# log when SM restarts are done print a summary only once
|
|
if [ "${sm_not_done}" = false -a "${sm_done}" = false ] ; then
|
|
sm_done=true
|
|
logged=false
|
|
for DAEMON in "${sm_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following 'sm managed' processes have been 'restarted'"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
logged=false
|
|
for DAEMON in "${sm_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following 'sm managed' processes have been 'skipped' ; due to previous restart"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
fi
|
|
fi
|
|
|
|
#########################################################################
|
|
# For all nodes ....
|
|
#########################################################################
|
|
|
|
# Loop over all PMON proceses looking for complete restarts.
|
|
# Update process struct PID field as status is learned.
|
|
|
|
if [ "$pmon_done" = false ] ; then
|
|
# Start assuming we are not done
|
|
pmon_not_done=false
|
|
index=0
|
|
for DAEMON in "${pmon_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
# Don't wast time on processes that are being skipped due to past restart
|
|
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't wast time on processes that have already restarted
|
|
elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't look for disabled processes
|
|
elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
|
|
((index++))
|
|
continue
|
|
|
|
# Don't look at not running processes
|
|
elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
|
|
((index++))
|
|
continue
|
|
fi
|
|
|
|
# Save the original PID
|
|
new_pid=`pidof ${info[${PROCESS_INDEX}]}`
|
|
if [ $? -eq 0 -a "${new_pid}" != "" ] ; then
|
|
# set the process as restarted as soon as we have a new pid
|
|
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
|
|
loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [PID: ${info[${PID_INDEX}]} -> ${new_pid}]"
|
|
info[${PID_INDEX}]=${RESTARTED}
|
|
fi
|
|
fi
|
|
|
|
# Set not done as long as there is one process not restarted
|
|
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
|
|
pmon_not_done=true
|
|
fi
|
|
|
|
# Add the PID to the process line
|
|
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
|
|
|
|
((index++))
|
|
done
|
|
fi
|
|
|
|
# log when all pmond restarts are done
|
|
if [ "${pmon_not_done}" = false -a "${pmon_done}" = false ] ; then
|
|
pmon_done=true
|
|
logged=false
|
|
for DAEMON in "${pmon_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following 'pmon managed' processes have been 'restarted'"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
|
|
logged=false
|
|
for DAEMON in "${pmon_managed_processes[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following 'pmon managed' processes have been 'skipped' ; due to previous restart"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# check for done. If this is not met in timeout then fail is returned
|
|
if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
|
|
GLOBAL_RC=$PATCH_STATUS_OK
|
|
break
|
|
fi
|
|
|
|
sleep 1
|
|
done
|
|
|
|
loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC}"
|
|
|
|
exit ${GLOBAL_RC}
|