Merge "Change ceph-init-wrapper wait logic"

This commit is contained in:
Zuul 2019-07-31 20:15:54 +00:00 committed by Gerrit Code Review
commit 1f20a61df5

View File

@ -40,7 +40,8 @@ source /etc/platform/platform.conf
CEPH_SCRIPT="/etc/init.d/ceph" CEPH_SCRIPT="/etc/init.d/ceph"
CEPH_FILE="$VOLATILE_PATH/.ceph_started" CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting" CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
CEPH_GET_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_status" CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt" CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
BINDIR=/usr/bin BINDIR=/usr/bin
@ -59,10 +60,13 @@ mkdir -p $DATA_PATH # make sure folder exists
MONITORING_INTERVAL=15 MONITORING_INTERVAL=15
TRACE_LOOP_INTERVAL=5 TRACE_LOOP_INTERVAL=5
GET_STATUS_TIMEOUT=120 GET_OSD_STATUS_TIMEOUT=120
GET_MONITOR_STATUS_TIMEOUT=30
CEPH_STATUS_TIMEOUT=20 CEPH_STATUS_TIMEOUT=20
WAIT_FOR_CMD=1 WAIT_FOR_CMD=1
MONITOR_COMMAND=0
OSD_COMMAND=0
RC=0 RC=0
@ -73,24 +77,65 @@ if [ ! -z $ARGS ]; then
args+=("${new_args[@]}") args+=("${new_args[@]}")
fi fi
check_command_type ()
{
if [[ $# -eq 0 ]]; then
MONITOR_COMMAND=1
OSD_COMMAND=1
elif [[ "$1" == "osd"* ]]; then
OSD_COMMAND=1
elif [[ "$1" == "mon"* ]]; then
MONITOR_COMMAND=1
else
exit 1
fi
}
wait_for_status () wait_for_status ()
{ {
timeout=$GET_STATUS_TIMEOUT # wait for status no more than $timeout seconds local STATUS_TIMEOUT=0
while [ -f ${CEPH_GET_STATUS_FILE} ] && [ $timeout -gt 0 ]; do
# For a general "ceph status" command which includes checks
# for both monitors and OSDS, we use the OSD timeout.
if [[ $OSD_COMMAND == 1 ]]; then
STATUS_TIMEOUT=$GET_OSD_STATUS_TIMEOUT
elif [[ $MONITOR_COMMAND == 1 ]]; then
STATUS_TIMEOUT=$GET_MONITOR_STATUS_TIMEOUT
fi
timeout_expiry=$((${SECONDS} + ${STATUS_TIMEOUT}))
while [ ${SECONDS} -le ${timeout_expiry} ]; do
if [[ $MONITOR_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_MON_STATUS_FILE} ]]; then
break
fi
if [[ $OSD_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_OSD_STATUS_FILE} ]]; then
break
fi
sleep 1 sleep 1
let timeout-=1
done done
if [ $timeout -eq 0 ]; then if [ $timeout -eq 0 ]; then
wlog "-" "WARN" "Getting status takes more than ${GET_STATUS_TIMEOUT}s, continuing" wlog "-" "WARN" "Getting status takes more than ${STATUS_TIMEOUT}s, continuing"
rm -f $CEPH_GET_STATUS_FILE if [[ $MONITOR_COMMAND == 1 ]]; then
rm -f $CEPH_GET_MON_STATUS_FILE
fi
if [[ $OSD_COMMAND == 1 ]]; then
rm -f $CEPH_GET_OSD_STATUS_FILE
fi
fi fi
} }
start () start ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph START $1 command received"
wait_for_status wait_for_status
${CEPH_SCRIPT} start $1 ${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
RC=$? RC=$?
else else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
@ -100,17 +145,21 @@ start ()
stop () stop ()
{ {
wlog "-" INFO "Ceph STOP $1 command received."
wait_for_status wait_for_status
${CEPH_SCRIPT} stop $1 ${CEPH_SCRIPT} stop $1
wlog "-" INFO "Ceph STOP $1 command finished."
} }
restart () restart ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph RESTART $1 command received."
wait_for_status wait_for_status
touch $CEPH_RESTARTING_FILE touch $CEPH_RESTARTING_FILE
${CEPH_SCRIPT} restart $1 ${CEPH_SCRIPT} restart $1
rm -f $CEPH_RESTARTING_FILE rm -f $CEPH_RESTARTING_FILE
wlog "-" INFO "Ceph RESTART $1 command finished."
else else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
exit 0 exit 0
@ -170,14 +219,13 @@ log_and_kill_hung_procs ()
done done
} }
status () status ()
{ {
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
timeout $CEPH_STATUS_TIMEOUT ceph -s timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then if [ "$?" -ne 0 ]; then
# Ceph cluster is not accessible. Don't panic, controller swact # Ceph cluster is not accessible. Don't panic, controller swact
# may be in progress. # may be in progress.
wlog "-" INFO "Ceph is down, ignoring OSD status." wlog "-" INFO "Ceph is down, ignoring OSD status."
exit 0 exit 0
fi fi
@ -191,7 +239,14 @@ status ()
if [ -f ${CEPH_FILE} ]; then if [ -f ${CEPH_FILE} ]; then
# Make sure the script does not 'exit' between here and the 'rm -f' below # Make sure the script does not 'exit' between here and the 'rm -f' below
# or the checkpoint file will be left behind # or the checkpoint file will be left behind
touch -f ${CEPH_GET_STATUS_FILE} if [[ $MONITOR_COMMAND == 1 ]]; then
touch -f ${CEPH_GET_MON_STATUS_FILE}
fi
if [[ $OSD_COMMAND == 1 ]]; then
touch -f ${CEPH_GET_OSD_STATUS_FILE}
fi
result=`${CEPH_SCRIPT} status $1` result=`${CEPH_SCRIPT} status $1`
RC=$? RC=$?
if [ "$RC" -ne 0 ]; then if [ "$RC" -ne 0 ]; then
@ -236,7 +291,13 @@ status ()
fi fi
fi fi
rm -f ${CEPH_GET_STATUS_FILE} if [[ $MONITOR_COMMAND == 1 ]]; then
rm -f ${CEPH_GET_MON_STATUS_FILE}
fi
if [[ $OSD_COMMAND == 1 ]]; then
rm -f ${CEPH_GET_OSD_STATUS_FILE}
fi
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# SM needs exit code != 0 from 'status mon' argument of the init script on # SM needs exit code != 0 from 'status mon' argument of the init script on
@ -262,15 +323,19 @@ status ()
case "${args[0]}" in case "${args[0]}" in
start) start)
check_command_type ${args[1]}
start ${args[1]} start ${args[1]}
;; ;;
stop) stop)
check_command_type ${args[1]}
stop ${args[1]} stop ${args[1]}
;; ;;
restart) restart)
check_command_type ${args[1]}
restart ${args[1]} restart ${args[1]}
;; ;;
status) status)
check_command_type ${args[1]}
status ${args[1]} status ${args[1]}
;; ;;
*) *)