diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index e50427f68..be0c75bc7 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.38 +version: 0.1.39 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl index c8a50202d..74229676c 100644 --- a/ceph-osd/templates/bin/_post-apply.sh.tpl +++ b/ceph-osd/templates/bin/_post-apply.sh.tpl @@ -188,31 +188,37 @@ done echo "Latest revision of the helm chart(s) is : $max_release" -if [[ $max_release -gt 1 ]]; then - if [[ $require_upgrade -gt 0 ]]; then - if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then - echo "restarting all osds simultaneously" - kubectl -n $CEPH_NAMESPACE delete pod -l component=osd - sleep 60 - echo "waiting for pgs to become active and for degraded objects to recover" - wait_for_pgs - wait_for_degraded_objects - ceph -s - else - echo "waiting for inactive pgs and degraded objects before upgrade" - wait_for_pgs - wait_for_degraded_and_misplaced_objects - ceph -s - ceph osd "set" noout - echo "lets restart the osds rack by rack" - restart_by_rack - ceph osd "unset" noout +# If flags are set that will prevent recovery, don't restart OSDs +ceph -s | grep "noup\|noin\|nobackfill\|norebalance\|norecover" > /dev/null +if [[ $? -ne 0 ]]; then + if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $max_release -gt 1 ]]; then + if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $require_upgrade -gt 0 ]]; then + if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then + echo "restarting all osds simultaneously" + kubectl -n $CEPH_NAMESPACE delete pod -l component=osd + sleep 60 + echo "waiting for pgs to become active and for degraded objects to recover" + wait_for_pgs + wait_for_degraded_objects + ceph -s + else + echo "waiting for inactive pgs and degraded objects before upgrade" + wait_for_pgs + wait_for_degraded_and_misplaced_objects + ceph -s + ceph osd "set" noout + echo "lets restart the osds rack by rack" + restart_by_rack + ceph osd "unset" noout + fi fi - fi - #lets check all the ceph-osd daemonsets - echo "checking DS" - check_ds + #lets check all the ceph-osd daemonsets + echo "checking DS" + check_ds + else + echo "No revisions found for upgrade" + fi else - echo "No revisions found for upgrade" + echo "Skipping OSD restarts because flags are set that would prevent recovery" fi diff --git a/ceph-osd/templates/job-post-apply.yaml b/ceph-osd/templates/job-post-apply.yaml index 6e9a34707..393769d95 100644 --- a/ceph-osd/templates/job-post-apply.yaml +++ b/ceph-osd/templates/job-post-apply.yaml @@ -104,6 +104,8 @@ spec: value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }} - name: DISRUPTIVE_OSD_RESTART value: {{ .Values.conf.storage.disruptive_osd_restart | quote }} + - name: UNCONDITIONAL_OSD_RESTART + value: {{ .Values.conf.storage.unconditional_osd_restart | quote }} command: - /tmp/post-apply.sh volumeMounts: diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml index 09c41e985..ad87e2a15 100644 --- a/ceph-osd/values.yaml +++ b/ceph-osd/values.yaml @@ -293,6 +293,11 @@ conf: # OSD restarts more quickly with disruption. disruptive_osd_restart: "false" + # The post-apply job will try to determine if OSDs need to be restarted and + # only restart them if necessary. Set this value to "true" to restart OSDs + # unconditionally. + unconditional_osd_restart: "false" + # NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define # OSD pods that will be deployed upon specifc nodes. # overrides: diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml index dd319eafb..a66f6e597 100644 --- a/releasenotes/notes/ceph-osd.yaml +++ b/releasenotes/notes/ceph-osd.yaml @@ -39,4 +39,5 @@ ceph-osd: - 0.1.36 Add OSD device location pre-check - 0.1.37 Add a disruptive OSD restart to the post-apply job - 0.1.38 Skip pod wait in post-apply job when disruptive + - 0.1.39 Allow for unconditional OSD restart ...