[ceph] Fix for ceph-osd pods restart

This PS updates ceph-osd pod containers making sure
that osd pods are not stuck at deletion.

It adds missed lifecycle preStop action for log0runner container.

Change-Id: I8d6853a457d3142c33ca6b5449351d9b05ffacda
This commit is contained in:
Sergiy Markin 2024-12-11 17:59:53 +00:00
parent 7811e90f4e
commit c98ea9ca61
6 changed files with 20 additions and 4 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.55
version: 0.1.56
home: https://github.com/ceph/ceph
...

View File

@ -49,12 +49,18 @@ function check_osd_count() {
fi
done
echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
wait_for_degraded_objects
echo "There is no degraded objects found"
ceph -s
exit 0
else
if [ "${num_osd}" -eq 0 ]; then
echo "There are no osds in the cluster"
elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}" ]; then
echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
wait_for_degraded_objects
echo "There is no degraded objects found"
ceph -s
exit 0
else
echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
@ -70,5 +76,4 @@ while true; do
check_osd_count
sleep 10
done
wait_for_degraded_objects
ceph -s

View File

@ -18,6 +18,8 @@ set -ex
source /tmp/utils-resolveLocations.sh
touch /tmp/ceph-log-runner.stop
TAIL_PID="$(cat /tmp/ceph-log-runner.pid)"
while kill -0 ${TAIL_PID} >/dev/null 2>&1;
do

View File

@ -27,7 +27,10 @@ function tail_file () {
tail_pid=$!
echo $tail_pid > /tmp/ceph-log-runner.pid
wait $tail_pid
sleep 10
if [ -f /tmp/ceph-log-runner.stop ]; then
keep_running=false
fi
sleep 30
done
}

View File

@ -317,6 +317,11 @@ spec:
value: {{ .Values.logging.osd_id.timeout | quote }}
command:
- /tmp/log-tail.sh
lifecycle:
preStop:
exec:
command:
- /tmp/log-runner-stop.sh
volumeMounts:
- name: pod-tmp
mountPath: /tmp

View File

@ -56,4 +56,5 @@ ceph-osd:
- 0.1.53 Update ceph-daemon to be able to use tini init system
- 0.1.54 Remove use of tini for ceph-daemon
- 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart
- 0.1.56 Add preStop lifecycle script to log-runner
...