[ceph] Fix for ceph-osd pods restart
This PS updates ceph-osd pod containers making sure that osd pods are not stuck at deletion. It adds missed lifecycle preStop action for log0runner container. Change-Id: I8d6853a457d3142c33ca6b5449351d9b05ffacda
This commit is contained in:
@@ -15,6 +15,6 @@ apiVersion: v1
|
|||||||
appVersion: v1.0.0
|
appVersion: v1.0.0
|
||||||
description: OpenStack-Helm Ceph OSD
|
description: OpenStack-Helm Ceph OSD
|
||||||
name: ceph-osd
|
name: ceph-osd
|
||||||
version: 0.1.55
|
version: 0.1.56
|
||||||
home: https://github.com/ceph/ceph
|
home: https://github.com/ceph/ceph
|
||||||
...
|
...
|
||||||
|
@@ -49,12 +49,18 @@ function check_osd_count() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
|
echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
|
||||||
|
wait_for_degraded_objects
|
||||||
|
echo "There is no degraded objects found"
|
||||||
|
ceph -s
|
||||||
exit 0
|
exit 0
|
||||||
else
|
else
|
||||||
if [ "${num_osd}" -eq 0 ]; then
|
if [ "${num_osd}" -eq 0 ]; then
|
||||||
echo "There are no osds in the cluster"
|
echo "There are no osds in the cluster"
|
||||||
elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}" ]; then
|
elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}" ]; then
|
||||||
echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
|
echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
|
||||||
|
wait_for_degraded_objects
|
||||||
|
echo "There is no degraded objects found"
|
||||||
|
ceph -s
|
||||||
exit 0
|
exit 0
|
||||||
else
|
else
|
||||||
echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
|
echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
|
||||||
@@ -70,5 +76,4 @@ while true; do
|
|||||||
check_osd_count
|
check_osd_count
|
||||||
sleep 10
|
sleep 10
|
||||||
done
|
done
|
||||||
wait_for_degraded_objects
|
|
||||||
ceph -s
|
|
||||||
|
@@ -18,6 +18,8 @@ set -ex
|
|||||||
|
|
||||||
source /tmp/utils-resolveLocations.sh
|
source /tmp/utils-resolveLocations.sh
|
||||||
|
|
||||||
|
touch /tmp/ceph-log-runner.stop
|
||||||
|
|
||||||
TAIL_PID="$(cat /tmp/ceph-log-runner.pid)"
|
TAIL_PID="$(cat /tmp/ceph-log-runner.pid)"
|
||||||
while kill -0 ${TAIL_PID} >/dev/null 2>&1;
|
while kill -0 ${TAIL_PID} >/dev/null 2>&1;
|
||||||
do
|
do
|
||||||
|
@@ -27,7 +27,10 @@ function tail_file () {
|
|||||||
tail_pid=$!
|
tail_pid=$!
|
||||||
echo $tail_pid > /tmp/ceph-log-runner.pid
|
echo $tail_pid > /tmp/ceph-log-runner.pid
|
||||||
wait $tail_pid
|
wait $tail_pid
|
||||||
sleep 10
|
if [ -f /tmp/ceph-log-runner.stop ]; then
|
||||||
|
keep_running=false
|
||||||
|
fi
|
||||||
|
sleep 30
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -317,6 +317,11 @@ spec:
|
|||||||
value: {{ .Values.logging.osd_id.timeout | quote }}
|
value: {{ .Values.logging.osd_id.timeout | quote }}
|
||||||
command:
|
command:
|
||||||
- /tmp/log-tail.sh
|
- /tmp/log-tail.sh
|
||||||
|
lifecycle:
|
||||||
|
preStop:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /tmp/log-runner-stop.sh
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: pod-tmp
|
- name: pod-tmp
|
||||||
mountPath: /tmp
|
mountPath: /tmp
|
||||||
|
@@ -56,4 +56,5 @@ ceph-osd:
|
|||||||
- 0.1.53 Update ceph-daemon to be able to use tini init system
|
- 0.1.53 Update ceph-daemon to be able to use tini init system
|
||||||
- 0.1.54 Remove use of tini for ceph-daemon
|
- 0.1.54 Remove use of tini for ceph-daemon
|
||||||
- 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart
|
- 0.1.55 Update ceph-osd pod containers to make sure OSD pods are properly terminated at restart
|
||||||
|
- 0.1.56 Add preStop lifecycle script to log-runner
|
||||||
...
|
...
|
||||||
|
Reference in New Issue
Block a user