Fix Ceph mon and osd processes start/stop conditions

For AIO-DX, Ceph monitor was not being started after an uncontrolled
swact caused by sudden power off/reboot of the active controller,
breaking the system high availability. This happens because there is a
flag to indicate on which controller the last active ceph monitor was
running to prevent starting ceph monitor without drbd-cephmon data in
sync, what could cause Ceph data corruption. That flag was also
avoiding data corruption caused when mgmt network was down and both
controllers were set to be active, starting ceph monitor without
drbd-cephmon in sync.

To prevent data corruption and to maintain system high availability,
this fix checks the mgmt network carrier instead of managing flags.
If no carrier is detected on mgmt network interface, then ceph mon and
osd are stopped and only allowed to start again after mgmt network has
carrier.

For the AIO-DX Direct, all networks are also verified. If all networks
have no carrier, then the other controller is considered down, letting
the working controller to be in active state even if mgmt network has
no carrier.

Test-Plan:
  PASS: Run system host-swact on AIO-DX and verify ceph is running
        with status HEALTH_OK
  PASS: Force an uncontrolled swact on AIO-DX by killing a critical
        process and verify if ceph is running with status HEALTH_OK
  PASS: Disconnect OAM and MGMT networks for both controllers on
        AIO-DX and verify ceph mon and osd stop on both controllers.
        Reconnect OAM and MGMT networks and verify if ceph is running
        and status is HEALTH_OK
  PASS: Reboot or power off active controller and verify on the other
        controller if ceph is running with status HEALT_WARN because
        one host is down. Power on the controller, wait until it is
        online/available. Verify if ceph HEALTH_OK after data is
        all ODSs are up and data is recovered.

Closes-bug: 2020889

Signed-off-by: Felipe Sanches Zanoni <Felipe.SanchesZanoni@windriver.com>
Change-Id: I38470f43eba86f88fb9cfe47869d2393cacbd365
This commit is contained in:
Felipe Sanches Zanoni 2023-05-30 16:51:59 -03:00
parent 0aa36aeaad
commit 655ab05b71

View File

@ -42,16 +42,7 @@ CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
# For All-in-one duplex, set some variables
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
CEPH_MON_LIB_PATH=/var/lib/ceph/mon
CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0"
CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1"
CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}"
CEPH_MON_SHUTDOWN_COMPLETE="${CEPH_MON_LIB_PATH}/.ceph_mon_shutdown_complete"
fi
BINDIR=/usr/bin
SBINDIR=/usr/sbin
@ -95,18 +86,6 @@ if [ ! -z $ARGS ]; then
args+=("${new_args[@]}")
fi
# Verify if drbd-cephmon is in sync, checking the output of 'drbdadm dstate'
# Return 0 on success and 1 if drbd-cephmon is not ready
is_drbd_cephmon_in_sync ()
{
local DRBD_CEPHMON_STATUS=$(drbdadm dstate drbd-cephmon)
wlog "-" INFO "drbd-cephmon status: ${DRBD_CEPHMON_STATUS}"
if [ "${DRBD_CEPHMON_STATUS}" == "UpToDate/UpToDate" ]; then
return 0
fi
return 1
}
# Verify if drbd-cephmon role is primary, checking the output of 'drbdadm role'
# Return 0 on success and 1 if drbd-cephmon is not primary
is_drbd_cephmon_primary ()
@ -133,6 +112,43 @@ is_drbd_cephmon_mounted ()
return 1
}
# Verify if oam, cluster host and mgmt networks have carrier.
# This is a special condition for AIO-DX Direct setup.
# If all networks have no carrier, then the other host is down.
# When the other host is down, ceph must start on this host.
# Return 0 if no carrier is detected on all network interfaces.
# Return 1 of carrier has been detected in at lease one network interface.
has_all_network_no_carrier()
{
ip link show "${oam_interface}" | grep NO-CARRIER
oam_carrier=$?
ip link show "${cluster_host_interface}" | grep NO-CARRIER
cluster_host_carrier=$?
ip link show "${management_interface}" | grep NO-CARRIER
mgmt_carrier=$?
# Check if all networks have no carrier, meaning the other host is down
if [ "${oam_carrier}" -eq 0 ] && [ "${cluster_host_carrier}" -eq 0 ] && [ "${mgmt_carrier}" -eq 0 ]; then
wlog "-" INFO "No carrier detected from all network interfaces"
return 0
fi
return 1
}
# Check mgmt network carrier signal
has_mgmt_network_carrier()
{
# Checks the carrier (cable connected) for management interface
# If no-carrier message is detected, then the interface has no physical link
ip link show "${management_interface}" | grep NO-CARRIER
if [ $? -eq 0 ]; then
wlog "-" INFO "management interface '${management_interface}' has NO-CARRIER, cannot start ceph mon"
return 1
fi
wlog "-" INFO "management interface '${management_interface}' is working"
return 0
}
# Verify if ceph mon can be started on AIO-DX configuration.
# This function must be called only on AIO-DX.
# Return 0 on success and 1 if ceph mon cannot be started
@ -172,39 +188,6 @@ can_start_ceph_mon ()
return 1
fi
# Ceph mon was last active in this controller. Can run safely.
if [ -f "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" ]; then
return 0
fi
# Check if last active ceph-mon was in another controller
if [ "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" == "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" ]; then
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
else
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
fi
if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then
if [ -f "${CEPH_MON_SHUTDOWN_COMPLETE}" ]; then
return 0
fi
# Verify drbd-cephmon status
for times in {9..0}; do
is_drbd_cephmon_in_sync
if [ $? -eq 0 ]; then
# drbd-cephmon is in sync, it is safe to run.
return 0
fi
sleep 1
done
# drbd-cephmon is not in sync, it is not safe to run
wlog "-" ERROR "drbd-cephmon is not in sync, cannot start ceph mon"
return 1
fi
# This is safe to run ceph mon
return 0
}
@ -260,44 +243,43 @@ start ()
local service="$1"
# For AIO-DX, the mon service has special treatment
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
# After the first controller unlock, ceph-mon is started by
# puppet-ceph module via sysvinit using /etc/init.d/ceph directly.
# Setting the controller-0 flag to the default prevents
# another controller from starting before any host-swact.
if [ ! -e "${CEPH_MON_LIB_PATH}"/.last_ceph_mon_active_controller_* ]; then
touch "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
fi
# For AIO-DX, ceph services have special treatment
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then
# NOTE: In case of uncontrolled swact, to force start ceph-mon service
# it will be needed to rename the flag to the desired controller.
# For ceph mon, check if drbd-cephmon is ready
if [ "${service}" == "mon" ]; then
can_start_ceph_mon
if [ $? -ne 0 ]; then
wlog "-" ERROR "Ceph mon cannot be started now."
wlog "-" INFO "Ceph monitor is not ready to start because drbd-cephmon is not ready and mounted"
exit 1
fi
fi
# Check mgmt network state
has_mgmt_network_carrier
if [ $? -ne 0 ]; then
# If this is a AIO-DX Direct, check if all other network interfaces are down
if [ "${system_mode}" == "duplex-direct" ]; then
has_all_network_no_carrier
if [ $? -eq 0 ]; then
wlog "-" INFO "All network interfaces are not functional, considering the other host is down. Let Ceph start."
else
# Else AIO-DX Direct mgmt network is NOT functional
wlog "-" INFO "Mgmt network is not functional, defer starting Ceph processes until recovered"
exit 1
fi
else
# Else AIO-DX mgmt network is NOT functional
wlog "-" INFO "Mgmt network is not functional, defer starting Ceph processes until recovered"
exit 1
fi
fi
fi
# Start the service
wlog "-" INFO "Ceph START ${service} command received"
with_service_lock "${service}" ${CEPH_SCRIPT} start ${service}
wlog "-" INFO "Ceph START ${service} command finished."
# For AIO-DX, the mon service has special treatment
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
# If ceph-mon is successfully running, clear old flags and set the new one
# RC global variable is set by the with_service_lock function trying to start ceph-mon
if [ ${RC} -eq 0 ]; then
# Remove old flags
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
rm -f "${CEPH_MON_SHUTDOWN_COMPLETE}"
# Create new flag
touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}"
fi
fi
}
stop ()
@ -307,10 +289,6 @@ stop ()
wlog "-" INFO "Ceph STOP $1 command received."
with_service_lock "$1" ${CEPH_SCRIPT} stop $1
wlog "-" INFO "Ceph STOP $1 command finished."
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
touch "${CEPH_MON_SHUTDOWN_COMPLETE}"
fi
}
restart ()
@ -394,6 +372,27 @@ status ()
fi
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
has_mgmt_network_carrier
if [ $? -eq 0 ]; then
# Network is functional, continue
wlog "-" INFO "mgmt network active..."
else
if [ "${system_mode}" == "duplex-direct" ]; then
has_all_network_no_carrier
if [ $? -ne 0 ]; then
# Network is NOT functional, prevent split brain corruptions
wlog "-" INFO "mgmt network inactive... stop OSDs to force a re-peering once the network has recovered"
stop "$1"
exit 0
fi
else
# Network is NOT functional, prevent split brain corruptions
wlog "-" INFO "mgmt network inactive... stop OSDs to force a re-peering once the network has recovered"
stop "$1"
exit 0
fi
fi
timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then
# Ceph cluster is not accessible. Don't panic, controller swact
@ -482,6 +481,24 @@ status ()
test -e "/var/lib/ceph/mon/ceph-controller"
if [ "$?" -ne 0 ]; then
exit 3
else
has_mgmt_network_carrier
if [ $? -ne 0 ]; then
if [ "${system_mode}" == "duplex-direct" ]; then
has_all_network_no_carrier
if [ $? -ne 0 ]; then
# Network is NOT functional, prevent split brain corruptions
wlog "-" INFO "mgmt network inactive... stop MON to prevent localized operation"
stop "$1"
exit 0
fi
else
# Network is NOT functional, prevent split brain corruptions
wlog "-" INFO "mgmt network inactive... stop MON to prevent localized operation"
stop "$1"
exit 0
fi
fi
fi
fi
}