From 7811e90f4ea87db50343f7de7bf50807d189b9e9 Mon Sep 17 00:00:00 2001 From: Sergiy Markin Date: Fri, 6 Dec 2024 02:33:08 +0000 Subject: [PATCH] [ceph] Fix for ceph-osd pods restart This PS updates ceph-osd pod containers making sure that osd pods are not stuck at deletion. Also added similar approach to add lifecycle ondelete hook to kill log-runner container process before pod restart. And added wait_for_degraded_object function to helm-test pod making sure that newly deployed pod are joined the ceph cluster and it is safe to go on with next ceph-osd chart releade upgrade. Change-Id: Ib31a5e1a82526906bff8c64ce1b199e3495b44b2 --- ceph-osd/Chart.yaml | 2 +- ceph-osd/templates/bin/_helm-tests.sh.tpl | 29 +++++++++++++++---- ceph-osd/templates/bin/_post-apply.sh.tpl | 23 ++++++++------- .../templates/bin/osd/_log-runner-stop.sh.tpl | 26 +++++++++++++++++ ceph-osd/templates/bin/osd/_log-tail.sh.tpl | 3 +- ceph-osd/templates/bin/osd/_stop.sh.tpl | 13 +++++---- ceph-osd/templates/configmap-bin.yaml | 2 ++ ceph-osd/templates/daemonset-osd.yaml | 4 +++ ceph-osd/templates/pod-helm-tests.yaml | 2 ++ releasenotes/notes/ceph-osd.yaml | 1 + 10 files changed, 82 insertions(+), 23 deletions(-) create mode 100644 ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index 353148876..85da89020 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.54 +version: 0.1.55 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/_helm-tests.sh.tpl b/ceph-osd/templates/bin/_helm-tests.sh.tpl index 6c47f8f78..cc21c9726 100644 --- a/ceph-osd/templates/bin/_helm-tests.sh.tpl +++ b/ceph-osd/templates/bin/_helm-tests.sh.tpl @@ -16,6 +16,17 @@ limitations under the License. set -ex +function wait_for_degraded_objects () { + echo "#### Start: Checking for degraded objects ####" + + # Loop until no degraded objects + while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]] + do + sleep 30 + ceph -s + done +} + function check_osd_count() { echo "#### Start: Checking OSD count ####" noup_flag=$(ceph osd stat | awk '/noup/ {print $2}') @@ -38,20 +49,26 @@ function check_osd_count() { fi done echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}." - if [ $MIN_OSDS -gt $count ]; then - exit 1 - fi + exit 0 else if [ "${num_osd}" -eq 0 ]; then echo "There are no osds in the cluster" - exit 1 elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}" ]; then echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status" + exit 0 else echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}" - exit 1 fi fi } -check_osd_count +# in case the chart has been re-installed in order to make changes to daemonset +# we do not need rack_by_rack restarts +# but we need to wait until all re-installed ceph-osd pods are healthy +# and there is degraded objects +while true; do + check_osd_count + sleep 10 +done +wait_for_degraded_objects +ceph -s diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl index 42732612a..c2fe97a16 100644 --- a/ceph-osd/templates/bin/_post-apply.sh.tpl +++ b/ceph-osd/templates/bin/_post-apply.sh.tpl @@ -111,7 +111,7 @@ function wait_for_pgs () { else (( pgs_ready+=1 )) fi - sleep 3 + sleep 30 done } @@ -121,7 +121,7 @@ function wait_for_degraded_objects () { # Loop until no degraded objects while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded'`" ]] do - sleep 3 + sleep 30 ceph -s done } @@ -132,7 +132,7 @@ function wait_for_degraded_and_misplaced_objects () { # Loop until no degraded or misplaced objects while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded\|misplaced'`" ]] do - sleep 3 + sleep 30 ceph -s done } @@ -148,14 +148,17 @@ function restart_by_rack() { echo "hosts count under $rack are: ${#hosts_in_rack[@]}" for host in ${hosts_in_rack[@]} do - echo "host is : $host" - if [[ ! -z "$host" ]]; then - pods_on_host=`kubectl get po -n $CEPH_NAMESPACE -l component=osd -o wide |grep $host|awk '{print $1}'` - echo "Restartig the pods under host $host" - kubectl delete po -n $CEPH_NAMESPACE $pods_on_host - fi + echo "host is : $host" + if [[ ! -z "$host" ]]; then + pods_on_host=$(kubectl get po -n "$CEPH_NAMESPACE" -l component=osd -o wide |grep "$host"|awk '{print $1}' | tr '\n' ' '|sed 's/ *$//g') + echo "Restarting the pods under host $host" + for pod in ${pods_on_host} + do + kubectl delete pod -n "$CEPH_NAMESPACE" "${pod}" || true + done + fi done - echo "waiting for the pods under rack $rack from restart" + echo "waiting for the pods under host $host from restart" # The pods will not be ready in first 60 seconds. Thus we can reduce # amount of queries to kubernetes. sleep 60 diff --git a/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl b/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl new file mode 100644 index 000000000..646a6bded --- /dev/null +++ b/ceph-osd/templates/bin/osd/_log-runner-stop.sh.tpl @@ -0,0 +1,26 @@ +#!/bin/bash + +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +source /tmp/utils-resolveLocations.sh + +TAIL_PID="$(cat /tmp/ceph-log-runner.pid)" +while kill -0 ${TAIL_PID} >/dev/null 2>&1; +do + kill -9 ${TAIL_PID}; + sleep 1; +done diff --git a/ceph-osd/templates/bin/osd/_log-tail.sh.tpl b/ceph-osd/templates/bin/osd/_log-tail.sh.tpl index 301259142..f8c4c8e10 100644 --- a/ceph-osd/templates/bin/osd/_log-tail.sh.tpl +++ b/ceph-osd/templates/bin/osd/_log-tail.sh.tpl @@ -25,8 +25,9 @@ function tail_file () { while $keep_running; do tail --retry -f "${log_file}" & tail_pid=$! + echo $tail_pid > /tmp/ceph-log-runner.pid wait $tail_pid - sleep 1 + sleep 10 done } diff --git a/ceph-osd/templates/bin/osd/_stop.sh.tpl b/ceph-osd/templates/bin/osd/_stop.sh.tpl index 6309c1e17..fdb2dda00 100644 --- a/ceph-osd/templates/bin/osd/_stop.sh.tpl +++ b/ceph-osd/templates/bin/osd/_stop.sh.tpl @@ -18,15 +18,18 @@ set -ex source /tmp/utils-resolveLocations.sh +CEPH_OSD_PID="$(cat /run/ceph-osd.pid)" +while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do + kill -SIGTERM ${CEPH_OSD_PID} + sleep 1 +done + if [ "x${STORAGE_TYPE%-*}" == "xblock" ]; then OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) if [ "x${STORAGE_TYPE#*-}" == "xlogical" ]; then - CEPH_OSD_PID="$(cat /run/ceph-osd.pid)" - while kill -0 ${CEPH_OSD_PID} >/dev/null 2>&1; do - kill -SIGTERM ${CEPH_OSD_PID} - sleep 1 - done umount "$(findmnt -S "${OSD_DEVICE}1" | tail -n +2 | awk '{ print $1 }')" fi fi + +fi diff --git a/ceph-osd/templates/configmap-bin.yaml b/ceph-osd/templates/configmap-bin.yaml index 7c2f2a680..adb6a0985 100644 --- a/ceph-osd/templates/configmap-bin.yaml +++ b/ceph-osd/templates/configmap-bin.yaml @@ -56,6 +56,8 @@ data: {{ tuple "bin/osd/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} osd-stop.sh: | {{ tuple "bin/osd/_stop.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + log-runner-stop.sh: | +{{ tuple "bin/osd/_log-runner-stop.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} init-dirs.sh: | {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} helm-tests.sh: | diff --git a/ceph-osd/templates/daemonset-osd.yaml b/ceph-osd/templates/daemonset-osd.yaml index 3ba2ce7e9..41d6b7b07 100644 --- a/ceph-osd/templates/daemonset-osd.yaml +++ b/ceph-osd/templates/daemonset-osd.yaml @@ -327,6 +327,10 @@ spec: - name: pod-var-log mountPath: /var/log/ceph readOnly: false + - name: ceph-osd-bin + mountPath: /tmp/log-runner-stop.sh + subPath: log-runner-stop.sh + readOnly: true - name: ceph-osd-default {{ tuple $envAll "ceph_osd" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.osd | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} diff --git a/ceph-osd/templates/pod-helm-tests.yaml b/ceph-osd/templates/pod-helm-tests.yaml index 9ee685bcb..9a5c98b8c 100644 --- a/ceph-osd/templates/pod-helm-tests.yaml +++ b/ceph-osd/templates/pod-helm-tests.yaml @@ -41,6 +41,8 @@ spec: {{ tuple $envAll $envAll.Values.pod.resources.jobs.tests | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }} {{ dict "envAll" $envAll "application" "test" "container" "ceph_cluster_helm_test" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 6 }} env: + - name: CLUSTER + value: "ceph" - name: CEPH_DEPLOYMENT_NAMESPACE value: {{ .Release.Namespace }} - name: REQUIRED_PERCENT_OF_OSDS diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml index 998ad87c9..ca681f9ea 100644 --- a/releasenotes/notes/ceph-osd.yaml +++ b/releasenotes/notes/ceph-osd.yaml @@ -55,4 +55,5 @@ ceph-osd: - 0.1.52 Use quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_focal by default - 0.1.53 Update ceph-daemon to be able to use tini init system - 0.1.54 Remove use of tini for ceph-daemon + - 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart ...