Merge "Change ceph-init-wrapper wait logic"
This commit is contained in:
commit
1f20a61df5
@ -40,7 +40,8 @@ source /etc/platform/platform.conf
|
|||||||
CEPH_SCRIPT="/etc/init.d/ceph"
|
CEPH_SCRIPT="/etc/init.d/ceph"
|
||||||
CEPH_FILE="$VOLATILE_PATH/.ceph_started"
|
CEPH_FILE="$VOLATILE_PATH/.ceph_started"
|
||||||
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
|
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
|
||||||
CEPH_GET_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_status"
|
CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
|
||||||
|
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
|
||||||
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
|
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
|
||||||
|
|
||||||
BINDIR=/usr/bin
|
BINDIR=/usr/bin
|
||||||
@ -59,10 +60,13 @@ mkdir -p $DATA_PATH # make sure folder exists
|
|||||||
|
|
||||||
MONITORING_INTERVAL=15
|
MONITORING_INTERVAL=15
|
||||||
TRACE_LOOP_INTERVAL=5
|
TRACE_LOOP_INTERVAL=5
|
||||||
GET_STATUS_TIMEOUT=120
|
GET_OSD_STATUS_TIMEOUT=120
|
||||||
|
GET_MONITOR_STATUS_TIMEOUT=30
|
||||||
CEPH_STATUS_TIMEOUT=20
|
CEPH_STATUS_TIMEOUT=20
|
||||||
|
|
||||||
WAIT_FOR_CMD=1
|
WAIT_FOR_CMD=1
|
||||||
|
MONITOR_COMMAND=0
|
||||||
|
OSD_COMMAND=0
|
||||||
|
|
||||||
RC=0
|
RC=0
|
||||||
|
|
||||||
@ -73,24 +77,65 @@ if [ ! -z $ARGS ]; then
|
|||||||
args+=("${new_args[@]}")
|
args+=("${new_args[@]}")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
check_command_type ()
|
||||||
|
{
|
||||||
|
if [[ $# -eq 0 ]]; then
|
||||||
|
MONITOR_COMMAND=1
|
||||||
|
OSD_COMMAND=1
|
||||||
|
elif [[ "$1" == "osd"* ]]; then
|
||||||
|
OSD_COMMAND=1
|
||||||
|
elif [[ "$1" == "mon"* ]]; then
|
||||||
|
MONITOR_COMMAND=1
|
||||||
|
else
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
wait_for_status ()
|
wait_for_status ()
|
||||||
{
|
{
|
||||||
timeout=$GET_STATUS_TIMEOUT # wait for status no more than $timeout seconds
|
local STATUS_TIMEOUT=0
|
||||||
while [ -f ${CEPH_GET_STATUS_FILE} ] && [ $timeout -gt 0 ]; do
|
|
||||||
|
# For a general "ceph status" command which includes checks
|
||||||
|
# for both monitors and OSDS, we use the OSD timeout.
|
||||||
|
if [[ $OSD_COMMAND == 1 ]]; then
|
||||||
|
STATUS_TIMEOUT=$GET_OSD_STATUS_TIMEOUT
|
||||||
|
elif [[ $MONITOR_COMMAND == 1 ]]; then
|
||||||
|
STATUS_TIMEOUT=$GET_MONITOR_STATUS_TIMEOUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
timeout_expiry=$((${SECONDS} + ${STATUS_TIMEOUT}))
|
||||||
|
while [ ${SECONDS} -le ${timeout_expiry} ]; do
|
||||||
|
if [[ $MONITOR_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_MON_STATUS_FILE} ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $OSD_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_OSD_STATUS_FILE} ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
sleep 1
|
sleep 1
|
||||||
let timeout-=1
|
|
||||||
done
|
done
|
||||||
|
|
||||||
if [ $timeout -eq 0 ]; then
|
if [ $timeout -eq 0 ]; then
|
||||||
wlog "-" "WARN" "Getting status takes more than ${GET_STATUS_TIMEOUT}s, continuing"
|
wlog "-" "WARN" "Getting status takes more than ${STATUS_TIMEOUT}s, continuing"
|
||||||
rm -f $CEPH_GET_STATUS_FILE
|
if [[ $MONITOR_COMMAND == 1 ]]; then
|
||||||
|
rm -f $CEPH_GET_MON_STATUS_FILE
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $OSD_COMMAND == 1 ]]; then
|
||||||
|
rm -f $CEPH_GET_OSD_STATUS_FILE
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
start ()
|
start ()
|
||||||
{
|
{
|
||||||
if [ -f ${CEPH_FILE} ]; then
|
if [ -f ${CEPH_FILE} ]; then
|
||||||
|
wlog "-" INFO "Ceph START $1 command received"
|
||||||
wait_for_status
|
wait_for_status
|
||||||
${CEPH_SCRIPT} start $1
|
${CEPH_SCRIPT} start $1
|
||||||
|
wlog "-" INFO "Ceph START $1 command finished."
|
||||||
RC=$?
|
RC=$?
|
||||||
else
|
else
|
||||||
# Ceph is not running on this node, return success
|
# Ceph is not running on this node, return success
|
||||||
@ -100,17 +145,21 @@ start ()
|
|||||||
|
|
||||||
stop ()
|
stop ()
|
||||||
{
|
{
|
||||||
|
wlog "-" INFO "Ceph STOP $1 command received."
|
||||||
wait_for_status
|
wait_for_status
|
||||||
${CEPH_SCRIPT} stop $1
|
${CEPH_SCRIPT} stop $1
|
||||||
|
wlog "-" INFO "Ceph STOP $1 command finished."
|
||||||
}
|
}
|
||||||
|
|
||||||
restart ()
|
restart ()
|
||||||
{
|
{
|
||||||
if [ -f ${CEPH_FILE} ]; then
|
if [ -f ${CEPH_FILE} ]; then
|
||||||
|
wlog "-" INFO "Ceph RESTART $1 command received."
|
||||||
wait_for_status
|
wait_for_status
|
||||||
touch $CEPH_RESTARTING_FILE
|
touch $CEPH_RESTARTING_FILE
|
||||||
${CEPH_SCRIPT} restart $1
|
${CEPH_SCRIPT} restart $1
|
||||||
rm -f $CEPH_RESTARTING_FILE
|
rm -f $CEPH_RESTARTING_FILE
|
||||||
|
wlog "-" INFO "Ceph RESTART $1 command finished."
|
||||||
else
|
else
|
||||||
# Ceph is not running on this node, return success
|
# Ceph is not running on this node, return success
|
||||||
exit 0
|
exit 0
|
||||||
@ -170,14 +219,13 @@ log_and_kill_hung_procs ()
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
status ()
|
status ()
|
||||||
{
|
{
|
||||||
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
|
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
|
||||||
timeout $CEPH_STATUS_TIMEOUT ceph -s
|
timeout $CEPH_STATUS_TIMEOUT ceph -s
|
||||||
if [ "$?" -ne 0 ]; then
|
if [ "$?" -ne 0 ]; then
|
||||||
# Ceph cluster is not accessible. Don't panic, controller swact
|
# Ceph cluster is not accessible. Don't panic, controller swact
|
||||||
# may be in progress.
|
# may be in progress.
|
||||||
wlog "-" INFO "Ceph is down, ignoring OSD status."
|
wlog "-" INFO "Ceph is down, ignoring OSD status."
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
@ -191,7 +239,14 @@ status ()
|
|||||||
if [ -f ${CEPH_FILE} ]; then
|
if [ -f ${CEPH_FILE} ]; then
|
||||||
# Make sure the script does not 'exit' between here and the 'rm -f' below
|
# Make sure the script does not 'exit' between here and the 'rm -f' below
|
||||||
# or the checkpoint file will be left behind
|
# or the checkpoint file will be left behind
|
||||||
touch -f ${CEPH_GET_STATUS_FILE}
|
if [[ $MONITOR_COMMAND == 1 ]]; then
|
||||||
|
touch -f ${CEPH_GET_MON_STATUS_FILE}
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $OSD_COMMAND == 1 ]]; then
|
||||||
|
touch -f ${CEPH_GET_OSD_STATUS_FILE}
|
||||||
|
fi
|
||||||
|
|
||||||
result=`${CEPH_SCRIPT} status $1`
|
result=`${CEPH_SCRIPT} status $1`
|
||||||
RC=$?
|
RC=$?
|
||||||
if [ "$RC" -ne 0 ]; then
|
if [ "$RC" -ne 0 ]; then
|
||||||
@ -236,7 +291,13 @@ status ()
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
rm -f ${CEPH_GET_STATUS_FILE}
|
if [[ $MONITOR_COMMAND == 1 ]]; then
|
||||||
|
rm -f ${CEPH_GET_MON_STATUS_FILE}
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $OSD_COMMAND == 1 ]]; then
|
||||||
|
rm -f ${CEPH_GET_OSD_STATUS_FILE}
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
|
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
|
||||||
# SM needs exit code != 0 from 'status mon' argument of the init script on
|
# SM needs exit code != 0 from 'status mon' argument of the init script on
|
||||||
@ -262,15 +323,19 @@ status ()
|
|||||||
|
|
||||||
case "${args[0]}" in
|
case "${args[0]}" in
|
||||||
start)
|
start)
|
||||||
|
check_command_type ${args[1]}
|
||||||
start ${args[1]}
|
start ${args[1]}
|
||||||
;;
|
;;
|
||||||
stop)
|
stop)
|
||||||
|
check_command_type ${args[1]}
|
||||||
stop ${args[1]}
|
stop ${args[1]}
|
||||||
;;
|
;;
|
||||||
restart)
|
restart)
|
||||||
|
check_command_type ${args[1]}
|
||||||
restart ${args[1]}
|
restart ${args[1]}
|
||||||
;;
|
;;
|
||||||
status)
|
status)
|
||||||
|
check_command_type ${args[1]}
|
||||||
status ${args[1]}
|
status ${args[1]}
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user