[ceph] Fix for ceph-osd pods restart

This PS updates ceph-osd pod containers making sure that osd pods are not stuck at deletion. Also added similar approach to add lifecycle ondelete hook to kill log-runner container process before pod restart. And added wait_for_degraded_object function to helm-test pod making sure that newly deployed pod are joined the ceph cluster and it is safe to go on with next ceph-osd chart releade upgrade. Change-Id: Ib31a5e1a82526906bff8c64ce1b199e3495b44b2
2024-12-06 02:33:08 +00:00 · 2024-12-06 02:33:08 +00:00 · 7811e90f4e
commit 7811e90f4e
parent cd1ee943f9
10 changed files with 82 additions and 23 deletions
--- a/ceph-osd/Chart.yaml
+++ b/ceph-osd/Chart.yaml
@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph OSD
 name: ceph-osd
-version: 0.1.54
+version: 0.1.55
 home: https://github.com/ceph/ceph
 ...
--- a/ceph-osd/templates/bin/_helm-tests.sh.tpl
+++ b/ceph-osd/templates/bin/_helm-tests.sh.tpl
@ -16,6 +16,17 @@ limitations under the License.

 set -ex

+function wait_for_degraded_objects () {
+  echo "#### Start: Checking for degraded objects ####"
+
+  # Loop until no degraded objects
+    while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]]
+    do
+      sleep 30
+      ceph -s
+    done
+}
+
 function check_osd_count() {
  echo "#### Start: Checking OSD count ####"
  noup_flag=$(ceph osd stat | awk '/noup/ {print $2}')
@ -38,20 +49,26 @@ function check_osd_count() {
      fi
    done
    echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
-    if [ $MIN_OSDS -gt $count ]; then
-      exit 1
-    fi
+    exit 0
  else
    if [ "${num_osd}" -eq 0 ]; then
      echo "There are no osds in the cluster"
-      exit 1
    elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}"  ]; then
      echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
+      exit 0
    else
      echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
-      exit 1
    fi
  fi
 }

-check_osd_count
+# in case the chart has been re-installed in order to make changes to daemonset
+# we do not need rack_by_rack restarts
+# but we need to wait until all re-installed ceph-osd pods are healthy
+# and there is degraded objects
+while true; do
+  check_osd_count
+  sleep 10
+done
+wait_for_degraded_objects
+ceph -s
--- a/ceph-osd/templates/bin/_post-apply.sh.tpl
+++ b/ceph-osd/templates/bin/_post-apply.sh.tpl
@ -111,7 +111,7 @@ function wait_for_pgs () {
    else
      (( pgs_ready+=1 ))
    fi
-    sleep 3
+    sleep 30
  done
 }

@ -121,7 +121,7 @@ function wait_for_degraded_objects () {
  # Loop until no degraded objects
    while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]]
    do
-      sleep 3
+      sleep 30
      ceph -s
    done
 }
@ -132,7 +132,7 @@ function wait_for_degraded_and_misplaced_objects () {
  # Loop until no degraded or misplaced objects
    while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded\|misplaced'`" ]]
    do
-      sleep 3
+      sleep 30
      ceph -s
    done
 }
@ -148,14 +148,17 @@ function restart_by_rack() {
     echo "hosts count under $rack are: ${#hosts_in_rack[@]}"
     for host in ${hosts_in_rack[@]}
     do
-       echo "host is : $host"
-       if [[ ! -z "$host" ]]; then
-         pods_on_host=`kubectl get po -n $CEPH_NAMESPACE -l component=osd -o wide |grep $host|awk '{print $1}'`
-         echo "Restartig  the pods under host $host"
-         kubectl delete  po -n $CEPH_NAMESPACE $pods_on_host
-       fi
+      echo "host is : $host"
+      if [[ ! -z "$host" ]]; then
+        pods_on_host=$(kubectl get po -n "$CEPH_NAMESPACE" -l component=osd -o wide |grep "$host"|awk '{print $1}' | tr '\n' ' '|sed 's/ *$//g')
+        echo "Restarting  the pods under host $host"
+        for pod in ${pods_on_host}
+        do
+          kubectl delete  pod -n "$CEPH_NAMESPACE" "${pod}" || true
+        done
+      fi
     done
-     echo "waiting for the pods under rack $rack from restart"
+     echo "waiting for the pods under host $host from restart"
     # The pods will not be ready in first 60 seconds. Thus we can reduce
     # amount of queries to kubernetes.
     sleep 60
--- a/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl
+++ b/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl
@ -0,0 +1,26 @@
+#!/bin/bash
+
+{{/*
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+
+set -ex
+
+source /tmp/utils-resolveLocations.sh
+
+TAIL_PID="$(cat /tmp/ceph-log-runner.pid)"
+while kill -0 ${TAIL_PID} >/dev/null 2>&1;
+do
+  kill -9 ${TAIL_PID};
+  sleep 1;
+done
--- a/ceph-osd/templates/bin/osd/_log-tail.sh.tpl
+++ b/ceph-osd/templates/bin/osd/_log-tail.sh.tpl
@ -25,8 +25,9 @@ function tail_file () {
  while $keep_running; do
    tail --retry -f "${log_file}" &
    tail_pid=$!
+    echo $tail_pid > /tmp/ceph-log-runner.pid
    wait $tail_pid
-    sleep 1
+    sleep 10
  done
 }

--- a/ceph-osd/templates/bin/osd/_stop.sh.tpl
+++ b/ceph-osd/templates/bin/osd/_stop.sh.tpl
@ -18,15 +18,18 @@ set -ex

 source /tmp/utils-resolveLocations.sh

+CEPH_OSD_PID="$(cat /run/ceph-osd.pid)"
+while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do
+    kill -SIGTERM ${CEPH_OSD_PID}
+    sleep 1
+done
+
 if [ "x${STORAGE_TYPE%-*}" == "xblock" ]; then
  OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
  OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
  if [ "x${STORAGE_TYPE#*-}" == "xlogical" ]; then
-    CEPH_OSD_PID="$(cat /run/ceph-osd.pid)"
-    while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do
-        kill -SIGTERM ${CEPH_OSD_PID}
-        sleep 1
-    done
    umount "$(findmnt -S "${OSD_DEVICE}1" | tail -n +2 | awk '{ print $1 }')"
  fi
 fi
+
+fi
--- a/ceph-osd/templates/configmap-bin.yaml
+++ b/ceph-osd/templates/configmap-bin.yaml
@ -56,6 +56,8 @@ data:
 {{ tuple "bin/osd/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
  osd-stop.sh: |
 {{ tuple "bin/osd/_stop.sh.tpl" . | include  "helm-toolkit.utils.template" | indent 4 }}
+  log-runner-stop.sh: |
+{{ tuple "bin/osd/_log-runner-stop.sh.tpl" . | include  "helm-toolkit.utils.template" | indent 4 }}
  init-dirs.sh: |
 {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
  helm-tests.sh: |
--- a/ceph-osd/templates/daemonset-osd.yaml
+++ b/ceph-osd/templates/daemonset-osd.yaml
@ -327,6 +327,10 @@ spec:
            - name: pod-var-log
              mountPath: /var/log/ceph
              readOnly: false
+            - name: ceph-osd-bin
+              mountPath: /tmp/log-runner-stop.sh
+              subPath: log-runner-stop.sh
+              readOnly: true
        - name: ceph-osd-default
 {{ tuple $envAll "ceph_osd" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.osd | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
--- a/ceph-osd/templates/pod-helm-tests.yaml
+++ b/ceph-osd/templates/pod-helm-tests.yaml
@ -41,6 +41,8 @@ spec:
 {{ tuple $envAll $envAll.Values.pod.resources.jobs.tests | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }}
 {{ dict "envAll" $envAll "application" "test" "container" "ceph_cluster_helm_test" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 6 }}
      env:
+        - name: CLUSTER
+          value: "ceph"
        - name: CEPH_DEPLOYMENT_NAMESPACE
          value: {{ .Release.Namespace }}
        - name: REQUIRED_PERCENT_OF_OSDS
--- a/releasenotes/notes/ceph-osd.yaml
+++ b/releasenotes/notes/ceph-osd.yaml
@ -55,4 +55,5 @@ ceph-osd:
  - 0.1.52 Use quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_focal by default
  - 0.1.53 Update ceph-daemon to be able to use tini init system
  - 0.1.54 Remove use of tini for ceph-daemon
+  - 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart
 ...