From f3c06824ca8979d654608698659b367c033c1b77 Mon Sep 17 00:00:00 2001 From: Gustavo Ornaghi Antunes Date: Wed, 23 Jul 2025 12:30:33 -0300 Subject: [PATCH] Improve rook-mon-exit service The rook-mon-exit service was causing uncontrolled swact in some scenarios, such as: BnR, upgrade/downgrade rook-ceph app, and apply action with swact during action. To fix it, the rook-mon-exit script was improved to guarantee the pod check will occur only when the rook-ceph-mon-float deployment exists and the replica is 1, avoiding the uncontrolled swact when the floating monitor was turned off for some action. Additionally, a new check to ensure the rook-ceph is running was implemented, checking the helm release rook-ceph and rook-ceph-cluster is ready. Test Plan: - PASS: Apply the rook-ceph App - PASS: Swact among controllers and check if the floating monitor will be scheduled correctly in the active controller - PASS: Reboot the active controller and check if the floating monitor will be scheduled correctly in the new active controller - PASS: Reboot all controllers and check if the floating monitor will be scheduled correctly in the active controller - PASS: Turn off all controllers, turn on the controllers in reverse order and check if the floating monitor will be scheduled correctly in the active controller - PASS: Perform Backup and Restore, and check if uncontrolled swacts was not occurring - PASS: Upgrade app from an old app version to a new app version - PASS: Downgrade app from a new app version to an old app version - PASS: Apply app and cause a proposal uncontrolled swact during the apply app action Closes-Bug: 2115438 Change-Id: I9cc2e5d55f389b053a6a29fdbe3ce2337d8871da Signed-off-by: Gustavo Ornaghi Antunes --- stx-rook-ceph-helm/files/rook-mon-exit.sh | 160 +++++++++++++++------- 1 file changed, 109 insertions(+), 51 deletions(-) diff --git a/stx-rook-ceph-helm/files/rook-mon-exit.sh b/stx-rook-ceph-helm/files/rook-mon-exit.sh index 19d3cff..7bf1657 100644 --- a/stx-rook-ceph-helm/files/rook-mon-exit.sh +++ b/stx-rook-ceph-helm/files/rook-mon-exit.sh @@ -21,7 +21,7 @@ log () { local name="" local log_level="$1" # Checking if the first parameter is not a log level - if grep -q -v ${log_level} <<< "INFO DEBUG WARN ERROR"; then + if grep -q -v "${log_level}" <<< "INFO DEBUG WARN ERROR"; then name=" ($1)"; log_level="$2" shift @@ -41,7 +41,7 @@ log () { # Start Action ################################################################################ function start { - log INFO "Starting mon-float" + log INFO "Start: Starting mon-float" # Add label for pod scheduling # NOTE: Because SM and k8s can be restarted independently the k8s API may not @@ -49,10 +49,10 @@ function start { # applied in the status check kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ - label node $(hostname) \ + label node "$(hostname)" \ ceph-mon-float-placement=enabled - log INFO "Started mon-float" + log INFO "Start: Started mon-float" RETVAL=0 } @@ -60,35 +60,36 @@ function start { # Stop Action ################################################################################ function stop { - log INFO "Stopping mon-float" + log INFO "Stop: Stopping mon-float" # Add remove label to prevent pod scheduling # NOTE: Because SM and k8s can be restarted independently the k8s API may not # be available at the time of the start action. Don't fail. Confirm label is # applied in the status check kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ - label node $(hostname) \ + label node "$(hostname)" \ ceph-mon-float-placement- # Get floating monitor pod running on this node - POD=$(kubectl --kubeconfig=/etc/kubernetes/admin.conf \ - --request-timeout ${REQUEST_TIMEOUT} \ - get pod -n rook-ceph \ - -l app="rook-ceph-mon,mon=float" --no-headers=true \ - --field-selector=spec.nodeName=$(hostname) \ - -o=custom-columns=NAME:.metadata.name) + POD=$( + kubectl --kubeconfig=/etc/kubernetes/admin.conf \ + --request-timeout ${REQUEST_TIMEOUT} \ + get pod -n rook-ceph \ + -l "app=rook-ceph-mon,mon=float" --no-headers=true \ + --field-selector=spec.nodeName="$(hostname)" \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null + ) # Is there a floating monitor here? - if [ ! -z "${POD}" ]; then - log INFO "Deleting floating monitor pod" + if [ -n "${POD}" ]; then + log INFO "Stop: Deleting floating monitor pod" # delete detected pod to force a reschedule kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ - delete pod -n rook-ceph \ - ${POD} - log INFO "Deleted floating monitor pod" + delete pod -n rook-ceph "${POD}" + log INFO "Stop: Deleted floating monitor pod" fi - log INFO "Stopped floating monitor pod" + log INFO "Stop: Stopped floating monitor pod" RETVAL=0 } @@ -120,89 +121,130 @@ function status { # - do not have label and pod: RETVAL 1 # - FLOAT_IS_INSTALLED=$( + declare -A HR_STATUS + JSONPATH='{range .items[*]}' + JSONPATH+='{.metadata.name}{" "}' + JSONPATH+='{.status.conditions[?(@.type=="Ready")].status}{"\n"}' + JSONPATH+='{end}' + while read -r nome status; do + HR_STATUS["$nome"]="$status" + done < <( kubectl --kubeconfig=/etc/kubernetes/admin.conf \ - --request-timeout ${REQUEST_TIMEOUT} \ - get hr rook-ceph-floating-monitor -n rook-ceph \ - -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' + --request-timeout "${REQUEST_TIMEOUT}" \ + get hr -n rook-ceph \ + -o jsonpath="${JSONPATH}" ) - # If the floating is not installed, skip other checks. - if [[ "${FLOAT_IS_INSTALLED}" == "True" ]]; then + IS_ROOK_INSTALLED=$( + [[ "${HR_STATUS['rook-ceph-cluster']}" == "True" && + "${HR_STATUS['rook-ceph']}" == "True" ]] \ + && echo 1 || echo 0 + ) + IS_FLOAT_INSTALLED=$( + [[ "${HR_STATUS['rook-ceph-floating-monitor']}" == "True" ]] \ + && echo 1 || echo 0 + ) + IS_READY_TO_CHECK=$( + [[ "$IS_ROOK_INSTALLED" == "1" && "$IS_FLOAT_INSTALLED" == "1" ]] \ + && echo 1 || echo 0 + ) + + # If the floating is installed, get infos for next checks. + if (( IS_READY_TO_CHECK )); then # Is this host labeled for the floating monitor - NODE_LABELED=$( + IS_NODE_LABELED=$( kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ get nodes \ -l ceph-mon-float-placement --no-headers=true \ - --field-selector=metadata.name=$(hostname) \ - -o=custom-columns=NAME:.metadata.name + --field-selector=metadata.name="$(hostname)" \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null + ) + IS_NODE_LABELED=$( + [[ -n "${IS_NODE_LABELED}" ]] \ + && echo 1 || echo 0 ) - # Get floating monitor pod running on this node - NODE_WITH_FLOAT=$( + FLOATING_MON_REPLICAS=$( kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ - get pod -n rook-ceph \ - -l app="rook-ceph-mon,mon=float" \ - --no-headers=true \ - --field-selector=spec.nodeName=$(hostname) \ - -o=custom-columns=NAME:.metadata.name + get deployment -n rook-ceph \ + rook-ceph-mon-float --no-headers=true \ + -o=custom-columns=STATUS:.status.replicas \ + 2>/dev/null | sed 's//0/' ) - else - # Check floating pod status only when helm release is Unknown — - # during first app apply. Later, it returns True or False. - if [[ "${FLOAT_IS_INSTALLED}" == "Unknown" ]]; then - # Get floating monitor pod - FLOAT_POD_STATUS=$( + FLOATING_MON_REPLICAS=${FLOATING_MON_REPLICAS:-0} + + if (( FLOATING_MON_REPLICAS == 1 )); then + # Get floating monitor pod running on this node + HAS_FLOATING_POD=$( kubectl --kubeconfig=/etc/kubernetes/admin.conf \ --request-timeout ${REQUEST_TIMEOUT} \ get pod -n rook-ceph \ -l app="rook-ceph-mon,mon=float" \ --no-headers=true \ - -o=custom-columns=STATUS:.status.phase + --field-selector=spec.nodeName="$(hostname)" \ + -o name ) fi + HAS_FLOATING_POD=$( + [[ -n "${HAS_FLOATING_POD}" ]] \ + && echo 1 || echo 0 + ) + else + # Get floating monitor pod if the floating is not installed + FLOAT_POD_STATUS=$( + kubectl --kubeconfig=/etc/kubernetes/admin.conf \ + --request-timeout ${REQUEST_TIMEOUT} \ + get pod -n rook-ceph \ + -l app="rook-ceph-mon,mon=float" \ + --no-headers=true \ + -o=custom-columns=STATUS:.status.phase + ) fi + IS_EXPECTED_STATE=0 mountpoint -d ${DRBD_MOUNT} | grep -q ^${DRBD_MAJ_DEV_NUM} # It is the active controller if [ $? -eq 0 ]; then - # There is not mon-float. - if [[ "${FLOAT_IS_INSTALLED}" == "True" ]]; then - if [[ -n "${NODE_LABELED}" && -n "${NODE_WITH_FLOAT}" ]]; then + # Rook-ceph is installed and there is not mon-float. + if (( IS_READY_TO_CHECK )); then + if (( IS_NODE_LABELED && \ + ( HAS_FLOATING_POD || FLOATING_MON_REPLICAS == 0 ) )); then # If floating monitor is installed and service in the active # host is in expected state, return [0] to keep as is. RETVAL=0 else # If floating monitor is installed but active host lacks # label or pod, return [1] to SM trigger the start action. - log INFO "Status: Floating monitor is missing on this host." + LOG_MESSAGE="Floating monitor is missing on this host." RETVAL=1 fi elif [[ "${FLOAT_POD_STATUS}" == "Pending" ]]; then # If floating monitor is installing, return [1] to SM trigger # the start action. - log INFO "Status: Floating monitor is pending on this host." + LOG_MESSAGE="Floating monitor is pending on this host." RETVAL=1 else # If floating monitor isn't running on active controller, # return [0] to prevent service failure and uncontrolled swact. RETVAL=0 fi + [ "${RETVAL}" -eq 0 ] && IS_EXPECTED_STATE=1 # It is the standby controller else - # There is mon-float. - if [[ "${FLOAT_IS_INSTALLED}" == "True" ]]; then - if [[ -z "${NODE_LABELED}" && -z "${NODE_WITH_FLOAT}" ]]; then + # Rook-ceph is installed and there is mon-float. + if (( IS_READY_TO_CHECK )); then + if (( ! IS_NODE_LABELED && \ + ( ! HAS_FLOATING_POD || FLOATING_MON_REPLICAS == 0 ) )); then # If floating monitor is installed and service in the standby # host is in expected state, return [1] to keep as is. RETVAL=1 else # If floating monitor is installed but standby host has # label or pod, return [1] to SM trigger the start action. - log INFO "Status: Floating monitor is present but not" \ - "allowed on this host." + LOG_MESSAGE="Floating monitor is present but not " + LOG_MESSAGE+="allowed on this host." RETVAL=0 fi else @@ -210,6 +252,22 @@ function status { # return [1] to prevent service failure and uncontrolled swact. RETVAL=1 fi + [ "${RETVAL}" -eq 1 ] && IS_EXPECTED_STATE=1 + fi + + if [ -n "${LOG_MESSAGE}" ]; then + log INFO "Status: ${LOG_MESSAGE}" + fi + + # Show floating monitor deployment log only if the rook-ceph is installed, + # floating monitor is installed, the expected state from this service is right + # but the floating monitor deployment replica is not the expected value [1] + if (( IS_READY_TO_CHECK && \ + IS_EXPECTED_STATE && \ + FLOATING_MON_REPLICAS == 0 )); then + log INFO \ + "Status: Floating monitor deployment has 0 replicas" \ + "or does not exist." fi }