[ceph-osd] Allow for unconditional OSD restart

This change allows OSDs to be restarted unconditionally by the
ceph-osd chart. This can be useful in upgrade scenarios where
ceph-osd pods are unhealthy during the upgrade.

Change-Id: I6de98db2b4eb1d76411e1dbffa65c263de3aecee
This commit is contained in:
Stephen Taylor 2022-04-04 13:35:49 -06:00
parent 50063c809c
commit 76fb2562c6
5 changed files with 39 additions and 25 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0 appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD description: OpenStack-Helm Ceph OSD
name: ceph-osd name: ceph-osd
version: 0.1.38 version: 0.1.39
home: https://github.com/ceph/ceph home: https://github.com/ceph/ceph
... ...

View File

@ -188,8 +188,11 @@ done
echo "Latest revision of the helm chart(s) is : $max_release" echo "Latest revision of the helm chart(s) is : $max_release"
if [[ $max_release -gt 1 ]]; then # If flags are set that will prevent recovery, don't restart OSDs
if [[ $require_upgrade -gt 0 ]]; then ceph -s | grep "noup\|noin\|nobackfill\|norebalance\|norecover" > /dev/null
if [[ $? -ne 0 ]]; then
if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $max_release -gt 1 ]]; then
if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $require_upgrade -gt 0 ]]; then
if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
echo "restarting all osds simultaneously" echo "restarting all osds simultaneously"
kubectl -n $CEPH_NAMESPACE delete pod -l component=osd kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
@ -216,3 +219,6 @@ if [[ $max_release -gt 1 ]]; then
else else
echo "No revisions found for upgrade" echo "No revisions found for upgrade"
fi fi
else
echo "Skipping OSD restarts because flags are set that would prevent recovery"
fi

View File

@ -104,6 +104,8 @@ spec:
value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }} value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }}
- name: DISRUPTIVE_OSD_RESTART - name: DISRUPTIVE_OSD_RESTART
value: {{ .Values.conf.storage.disruptive_osd_restart | quote }} value: {{ .Values.conf.storage.disruptive_osd_restart | quote }}
- name: UNCONDITIONAL_OSD_RESTART
value: {{ .Values.conf.storage.unconditional_osd_restart | quote }}
command: command:
- /tmp/post-apply.sh - /tmp/post-apply.sh
volumeMounts: volumeMounts:

View File

@ -293,6 +293,11 @@ conf:
# OSD restarts more quickly with disruption. # OSD restarts more quickly with disruption.
disruptive_osd_restart: "false" disruptive_osd_restart: "false"
# The post-apply job will try to determine if OSDs need to be restarted and
# only restart them if necessary. Set this value to "true" to restart OSDs
# unconditionally.
unconditional_osd_restart: "false"
# NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define # NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
# OSD pods that will be deployed upon specifc nodes. # OSD pods that will be deployed upon specifc nodes.
# overrides: # overrides:

View File

@ -39,4 +39,5 @@ ceph-osd:
- 0.1.36 Add OSD device location pre-check - 0.1.36 Add OSD device location pre-check
- 0.1.37 Add a disruptive OSD restart to the post-apply job - 0.1.37 Add a disruptive OSD restart to the post-apply job
- 0.1.38 Skip pod wait in post-apply job when disruptive - 0.1.38 Skip pod wait in post-apply job when disruptive
- 0.1.39 Allow for unconditional OSD restart
... ...