From 7811e90f4ea87db50343f7de7bf50807d189b9e9 Mon Sep 17 00:00:00 2001
From: Sergiy Markin <smarkin@mirantis.com>
Date: Fri, 6 Dec 2024 02:33:08 +0000
Subject: [PATCH] [ceph] Fix for ceph-osd pods restart

This PS updates ceph-osd pod containers making sure
that osd pods are not stuck at deletion. Also
added similar approach to add lifecycle ondelete
hook to kill log-runner container process before pod restart.

And added wait_for_degraded_object function to
helm-test pod making sure that newly deployed pod
are joined the ceph cluster and it is safe to go
on with next ceph-osd chart releade upgrade.

Change-Id: Ib31a5e1a82526906bff8c64ce1b199e3495b44b2
---
 ceph-osd/Chart.yaml                           |  2 +-
 ceph-osd/templates/bin/_helm-tests.sh.tpl     | 29 +++++++++++++++----
 ceph-osd/templates/bin/_post-apply.sh.tpl     | 23 ++++++++-------
 .../templates/bin/osd/_log-runner-stop.sh.tpl | 26 +++++++++++++++++
 ceph-osd/templates/bin/osd/_log-tail.sh.tpl   |  3 +-
 ceph-osd/templates/bin/osd/_stop.sh.tpl       | 13 +++++----
 ceph-osd/templates/configmap-bin.yaml         |  2 ++
 ceph-osd/templates/daemonset-osd.yaml         |  4 +++
 ceph-osd/templates/pod-helm-tests.yaml        |  2 ++
 releasenotes/notes/ceph-osd.yaml              |  1 +
 10 files changed, 82 insertions(+), 23 deletions(-)
 create mode 100644 ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl

diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml
index 353148876..85da89020 100644
--- a/ceph-osd/Chart.yaml
+++ b/ceph-osd/Chart.yaml
@@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph OSD
 name: ceph-osd
-version: 0.1.54
+version: 0.1.55
 home: https://github.com/ceph/ceph
 ...
diff --git a/ceph-osd/templates/bin/_helm-tests.sh.tpl b/ceph-osd/templates/bin/_helm-tests.sh.tpl
index 6c47f8f78..cc21c9726 100644
--- a/ceph-osd/templates/bin/_helm-tests.sh.tpl
+++ b/ceph-osd/templates/bin/_helm-tests.sh.tpl
@@ -16,6 +16,17 @@ limitations under the License.
 
 set -ex
 
+function wait_for_degraded_objects () {
+  echo "#### Start: Checking for degraded objects ####"
+
+  # Loop until no degraded objects
+    while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]]
+    do
+      sleep 30
+      ceph -s
+    done
+}
+
 function check_osd_count() {
   echo "#### Start: Checking OSD count ####"
   noup_flag=$(ceph osd stat | awk '/noup/ {print $2}')
@@ -38,20 +49,26 @@ function check_osd_count() {
       fi
     done
     echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
-    if [ $MIN_OSDS -gt $count ]; then
-      exit 1
-    fi
+    exit 0
   else
     if [ "${num_osd}" -eq 0 ]; then
       echo "There are no osds in the cluster"
-      exit 1
     elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}"  ]; then
       echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
+      exit 0
     else
       echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
-      exit 1
     fi
   fi
 }
 
-check_osd_count
+# in case the chart has been re-installed in order to make changes to daemonset
+# we do not need rack_by_rack restarts
+# but we need to wait until all re-installed ceph-osd pods are healthy
+# and there is degraded objects
+while true; do
+  check_osd_count
+  sleep 10
+done
+wait_for_degraded_objects
+ceph -s
diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl
index 42732612a..c2fe97a16 100644
--- a/ceph-osd/templates/bin/_post-apply.sh.tpl
+++ b/ceph-osd/templates/bin/_post-apply.sh.tpl
@@ -111,7 +111,7 @@ function wait_for_pgs () {
     else
       (( pgs_ready+=1 ))
     fi
-    sleep 3
+    sleep 30
   done
 }
 
@@ -121,7 +121,7 @@ function wait_for_degraded_objects () {
   # Loop until no degraded objects
     while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]]
     do
-      sleep 3
+      sleep 30
       ceph -s
     done
 }
@@ -132,7 +132,7 @@ function wait_for_degraded_and_misplaced_objects () {
   # Loop until no degraded or misplaced objects
     while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded\|misplaced'`" ]]
     do
-      sleep 3
+      sleep 30
       ceph -s
     done
 }
@@ -148,14 +148,17 @@ function restart_by_rack() {
      echo "hosts count under $rack are: ${#hosts_in_rack[@]}"
      for host in ${hosts_in_rack[@]}
      do
-       echo "host is : $host"
-       if [[ ! -z "$host" ]]; then
-         pods_on_host=`kubectl get po -n $CEPH_NAMESPACE -l component=osd -o wide |grep $host|awk '{print $1}'`
-         echo "Restartig  the pods under host $host"
-         kubectl delete  po -n $CEPH_NAMESPACE $pods_on_host
-       fi
+      echo "host is : $host"
+      if [[ ! -z "$host" ]]; then
+        pods_on_host=$(kubectl get po -n "$CEPH_NAMESPACE" -l component=osd -o wide |grep "$host"|awk '{print $1}' | tr '\n' ' '|sed 's/ *$//g')
+        echo "Restarting  the pods under host $host"
+        for pod in ${pods_on_host}
+        do
+          kubectl delete  pod -n "$CEPH_NAMESPACE" "${pod}" || true
+        done
+      fi
      done
-     echo "waiting for the pods under rack $rack from restart"
+     echo "waiting for the pods under host $host from restart"
      # The pods will not be ready in first 60 seconds. Thus we can reduce
      # amount of queries to kubernetes.
      sleep 60
diff --git a/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl b/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl
new file mode 100644
index 000000000..646a6bded
--- /dev/null
+++ b/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+{{/*
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+
+set -ex
+
+source /tmp/utils-resolveLocations.sh
+
+TAIL_PID="$(cat /tmp/ceph-log-runner.pid)"
+while kill -0 ${TAIL_PID} >/dev/null 2>&1;
+do
+  kill -9 ${TAIL_PID};
+  sleep 1;
+done
diff --git a/ceph-osd/templates/bin/osd/_log-tail.sh.tpl b/ceph-osd/templates/bin/osd/_log-tail.sh.tpl
index 301259142..f8c4c8e10 100644
--- a/ceph-osd/templates/bin/osd/_log-tail.sh.tpl
+++ b/ceph-osd/templates/bin/osd/_log-tail.sh.tpl
@@ -25,8 +25,9 @@ function tail_file () {
   while $keep_running; do
     tail --retry -f "${log_file}" &
     tail_pid=$!
+    echo $tail_pid > /tmp/ceph-log-runner.pid
     wait $tail_pid
-    sleep 1
+    sleep 10
   done
 }
 
diff --git a/ceph-osd/templates/bin/osd/_stop.sh.tpl b/ceph-osd/templates/bin/osd/_stop.sh.tpl
index 6309c1e17..fdb2dda00 100644
--- a/ceph-osd/templates/bin/osd/_stop.sh.tpl
+++ b/ceph-osd/templates/bin/osd/_stop.sh.tpl
@@ -18,15 +18,18 @@ set -ex
 
 source /tmp/utils-resolveLocations.sh
 
+CEPH_OSD_PID="$(cat /run/ceph-osd.pid)"
+while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do
+    kill -SIGTERM ${CEPH_OSD_PID}
+    sleep 1
+done
+
 if [ "x${STORAGE_TYPE%-*}" == "xblock" ]; then
   OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
   OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
   if [ "x${STORAGE_TYPE#*-}" == "xlogical" ]; then
-    CEPH_OSD_PID="$(cat /run/ceph-osd.pid)"
-    while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do
-        kill -SIGTERM ${CEPH_OSD_PID}
-        sleep 1
-    done
     umount "$(findmnt -S "${OSD_DEVICE}1" | tail -n +2 | awk '{ print $1 }')"
   fi
 fi
+
+fi
diff --git a/ceph-osd/templates/configmap-bin.yaml b/ceph-osd/templates/configmap-bin.yaml
index 7c2f2a680..adb6a0985 100644
--- a/ceph-osd/templates/configmap-bin.yaml
+++ b/ceph-osd/templates/configmap-bin.yaml
@@ -56,6 +56,8 @@ data:
 {{ tuple "bin/osd/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
   osd-stop.sh: |
 {{ tuple "bin/osd/_stop.sh.tpl" . | include  "helm-toolkit.utils.template" | indent 4 }}
+  log-runner-stop.sh: |
+{{ tuple "bin/osd/_log-runner-stop.sh.tpl" . | include  "helm-toolkit.utils.template" | indent 4 }}
   init-dirs.sh: |
 {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
   helm-tests.sh: |
diff --git a/ceph-osd/templates/daemonset-osd.yaml b/ceph-osd/templates/daemonset-osd.yaml
index 3ba2ce7e9..41d6b7b07 100644
--- a/ceph-osd/templates/daemonset-osd.yaml
+++ b/ceph-osd/templates/daemonset-osd.yaml
@@ -327,6 +327,10 @@ spec:
             - name: pod-var-log
               mountPath: /var/log/ceph
               readOnly: false
+            - name: ceph-osd-bin
+              mountPath: /tmp/log-runner-stop.sh
+              subPath: log-runner-stop.sh
+              readOnly: true
         - name: ceph-osd-default
 {{ tuple $envAll "ceph_osd" | include "helm-toolkit.snippets.image" | indent 10 }}
 {{ tuple $envAll $envAll.Values.pod.resources.osd | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
diff --git a/ceph-osd/templates/pod-helm-tests.yaml b/ceph-osd/templates/pod-helm-tests.yaml
index 9ee685bcb..9a5c98b8c 100644
--- a/ceph-osd/templates/pod-helm-tests.yaml
+++ b/ceph-osd/templates/pod-helm-tests.yaml
@@ -41,6 +41,8 @@ spec:
 {{ tuple $envAll $envAll.Values.pod.resources.jobs.tests | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }}
 {{ dict "envAll" $envAll "application" "test" "container" "ceph_cluster_helm_test" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 6 }}
       env:
+        - name: CLUSTER
+          value: "ceph"
         - name: CEPH_DEPLOYMENT_NAMESPACE
           value: {{ .Release.Namespace }}
         - name: REQUIRED_PERCENT_OF_OSDS
diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml
index 998ad87c9..ca681f9ea 100644
--- a/releasenotes/notes/ceph-osd.yaml
+++ b/releasenotes/notes/ceph-osd.yaml
@@ -55,4 +55,5 @@ ceph-osd:
   - 0.1.52 Use quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_focal by default
   - 0.1.53 Update ceph-daemon to be able to use tini init system
   - 0.1.54 Remove use of tini for ceph-daemon
+  - 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart
 ...