From ae17a61836c8d4d0b7e12bdf1b433b78c9afe24c Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Mon, 7 Feb 2022 14:03:01 -0700 Subject: [PATCH] [ceph-mon] Add a post-apply job to restart mons after mgrs If the OnDelete pod restart strategy is used for the ceph-mon daemonset, run a post-apply job to restart the ceph-mon pods one at a time. Otherwise the mons could restart before the mgrs, which can be problematic in some upgrade scenarios. Change-Id: I57f87130e95088217c3cfe73512caaae41d3ef22 --- ceph-mon/Chart.yaml | 2 +- ceph-mon/templates/bin/_post-apply.sh.tpl | 132 ++++++++++++++++++++ ceph-mon/templates/configmap-bin.yaml | 2 + ceph-mon/templates/job-post-apply.yaml | 143 ++++++++++++++++++++++ ceph-mon/values.yaml | 8 ++ ceph-mon/values_overrides/apparmor.yaml | 2 + releasenotes/notes/ceph-mon.yaml | 1 + 7 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 ceph-mon/templates/bin/_post-apply.sh.tpl create mode 100644 ceph-mon/templates/job-post-apply.yaml diff --git a/ceph-mon/Chart.yaml b/ceph-mon/Chart.yaml index 85fbc9d7c..c4ccaa836 100644 --- a/ceph-mon/Chart.yaml +++ b/ceph-mon/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph Mon name: ceph-mon -version: 0.1.18 +version: 0.1.19 home: https://github.com/ceph/ceph ... diff --git a/ceph-mon/templates/bin/_post-apply.sh.tpl b/ceph-mon/templates/bin/_post-apply.sh.tpl new file mode 100644 index 000000000..93412ed4c --- /dev/null +++ b/ceph-mon/templates/bin/_post-apply.sh.tpl @@ -0,0 +1,132 @@ +#!/bin/bash + +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +export LC_ALL=C + +: "${ADMIN_KEYRING:=/etc/ceph/${CLUSTER}.client.admin.keyring}" + +if [[ ! -f /etc/ceph/${CLUSTER}.conf ]]; then + echo "ERROR- /etc/ceph/${CLUSTER}.conf must exist; get it from your existing mon" + exit 1 +fi + +if [[ ! -f ${ADMIN_KEYRING} ]]; then + echo "ERROR- ${ADMIN_KEYRING} must exist; get it from your existing mon" + exit 1 +fi + +ceph --cluster ${CLUSTER} -s +function wait_for_pods() { + timeout=${2:-1800} + end=$(date -ud "${timeout} seconds" +%s) + # Selecting containers with "ceph-mon" name and + # counting them based on "ready" field. + count_pods=".items | map(.status.containerStatuses | .[] | \ + select(.name==\"ceph-mon\")) | \ + group_by(.ready) | map({(.[0].ready | tostring): length}) | .[]" + min_mons="add | if .true >= (.false + .true) \ + then \"pass\" else \"fail\" end" + while true; do + # Leave while loop if all mons are ready. + state=$(kubectl get pods --namespace="${1}" -l component=mon -o json | jq "${count_pods}") + mon_state=$(jq -s "${min_mons}" <<< "${state}") + if [[ "${mon_state}" == \"pass\" ]]; then + break + fi + sleep 5 + + if [ $(date -u +%s) -gt $end ] ; then + echo -e "Containers failed to start after $timeout seconds\n" + kubectl get pods --namespace "${1}" -o wide -l component=mon + exit 1 + fi + done +} + +function check_ds() { + for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=mon --no-headers=true|awk '{print $1}'` + do + ds_query=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status` + if echo $ds_query |grep -i "numberAvailable" ;then + currentNumberScheduled=`echo $ds_query|jq -r .currentNumberScheduled` + desiredNumberScheduled=`echo $ds_query|jq -r .desiredNumberScheduled` + numberAvailable=`echo $ds_query|jq -r .numberAvailable` + numberReady=`echo $ds_query|jq -r .numberReady` + updatedNumberScheduled=`echo $ds_query|jq -r .updatedNumberScheduled` + ds_check=`echo "$currentNumberScheduled $desiredNumberScheduled $numberAvailable $numberReady $updatedNumberScheduled"| \ + tr ' ' '\n'|sort -u|wc -l` + if [ $ds_check != 1 ]; then + echo "Some pods in daemonset $ds are not ready" + exit + else + echo "All pods in deamonset $ds are ready" + fi + else + echo "There are no mons under daemonset $ds" + fi + done +} + +function restart_mons() { + mon_pods=`kubectl get po -n $CEPH_NAMESPACE -l component=mon --no-headers | awk '{print $1}'` + + for pod in ${mon_pods} + do + if [[ -n "$pod" ]]; then + echo "Restarting pod $pod" + kubectl delete pod -n $CEPH_NAMESPACE $pod + fi + echo "Waiting for the pod $pod to restart" + # The pod will not be ready in first 60 seconds. Thus we can reduce + # amount of queries to kubernetes. + sleep 60 + wait_for_pods + ceph -s + done +} + +wait_for_pods $CEPH_NAMESPACE + +require_upgrade=0 +max_release=0 + +for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=mon --no-headers=true|awk '{print $1}'` +do + updatedNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.updatedNumberScheduled` + desiredNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.desiredNumberScheduled` + if [[ $updatedNumberScheduled != $desiredNumberScheduled ]]; then + if kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status|grep -i "numberAvailable" ;then + require_upgrade=$((require_upgrade+1)) + _release=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.observedGeneration` + max_release=$(( max_release > _release ? max_release : _release )) + fi + fi +done + +echo "Latest revision of the helm chart(s) is : $max_release" + +if [[ $max_release -gt 1 ]]; then + if [[ $require_upgrade -gt 0 ]]; then + echo "Restart ceph-mon pods one at a time to prevent disruption" + restart_mons + fi + + # Check all the ceph-mon daemonsets + echo "checking DS" + check_ds +else + echo "No revisions found for upgrade" +fi diff --git a/ceph-mon/templates/configmap-bin.yaml b/ceph-mon/templates/configmap-bin.yaml index 438b1fe64..59cadc10e 100644 --- a/ceph-mon/templates/configmap-bin.yaml +++ b/ceph-mon/templates/configmap-bin.yaml @@ -29,6 +29,8 @@ data: bootstrap.sh: | {{ tuple "bin/_bootstrap.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{- end }} + post-apply.sh: | +{{ tuple "bin/_post-apply.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} init-dirs.sh: | {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} diff --git a/ceph-mon/templates/job-post-apply.yaml b/ceph-mon/templates/job-post-apply.yaml new file mode 100644 index 000000000..01a1b1f7f --- /dev/null +++ b/ceph-mon/templates/job-post-apply.yaml @@ -0,0 +1,143 @@ +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if eq .Values.pod.lifecycle.upgrades.daemonsets.pod_replacement_strategy "OnDelete" }} +{{- if and .Values.manifests.job_post_apply }} +{{- $envAll := . }} + +{{- $serviceAccountName := printf "%s-%s" .Release.Name "post-apply" }} +{{ tuple $envAll "post-apply" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ $serviceAccountName }} +rules: + - apiGroups: + - '' + resources: + - pods + - events + - jobs + - pods/exec + verbs: + - create + - get + - delete + - list + - apiGroups: + - 'apps' + resources: + - daemonsets + verbs: + - get + - list + - apiGroups: + - 'batch' + resources: + - jobs + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $serviceAccountName }} +subjects: + - kind: ServiceAccount + name: {{ $serviceAccountName }} + namespace: {{ $envAll.Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ $serviceAccountName }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ $serviceAccountName }} + labels: +{{ tuple $envAll "ceph-upgrade" "post-apply" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + annotations: + {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }} +spec: + template: + metadata: + labels: +{{ tuple $envAll "ceph-upgrade" "post-apply" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} +{{ dict "envAll" $envAll "podName" "ceph-mon-post-apply" "containerNames" (list "ceph-mon-post-apply" "init" ) | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }} + spec: +{{ dict "envAll" $envAll "application" "post_apply" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }} + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value }} + initContainers: +{{ tuple $envAll "post-apply" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: ceph-mon-post-apply +{{ tuple $envAll "ceph_config_helper" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.bootstrap | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} +{{ dict "envAll" $envAll "application" "post_apply" "container" "ceph_mon_post_apply" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} + env: + - name: CLUSTER + value: "ceph" + - name: CEPH_NAMESPACE + value: {{ .Release.Namespace }} + - name: RELEASE_GROUP_NAME + value: {{ .Release.Name }} + command: + - /tmp/post-apply.sh + volumeMounts: + - name: pod-tmp + mountPath: /tmp + - name: pod-etc-ceph + mountPath: /etc/ceph + - name: ceph-mon-bin + mountPath: /tmp/post-apply.sh + subPath: post-apply.sh + readOnly: true + - name: ceph-mon-bin + mountPath: /tmp/wait-for-pods.sh + subPath: wait-for-pods.sh + readOnly: true + - name: ceph-mon-etc + mountPath: /etc/ceph/ceph.conf + subPath: ceph.conf + readOnly: true + - name: ceph-mon-admin-keyring + mountPath: /etc/ceph/ceph.client.admin.keyring + subPath: ceph.client.admin.keyring + readOnly: true + volumes: + - name: pod-tmp + emptyDir: {} + - name: pod-etc-ceph + emptyDir: {} + - name: ceph-mon-bin + configMap: + name: {{ printf "%s-%s" $envAll.Release.Name "bin" | quote }} + defaultMode: 0555 + - name: ceph-mon-etc + configMap: + name: {{ printf "%s-%s" $envAll.Release.Name "etc" | quote }} + defaultMode: 0444 + - name: ceph-mon-admin-keyring + secret: + secretName: {{ .Values.secrets.keyrings.admin }} +{{- end }} +{{- end }} diff --git a/ceph-mon/values.yaml b/ceph-mon/values.yaml index 172069377..34cdc6863 100644 --- a/ceph-mon/values.yaml +++ b/ceph-mon/values.yaml @@ -111,6 +111,13 @@ pod: ceph-osd-keyring-generator: allowPrivilegeEscalation: false readOnlyRootFilesystem: true + post_apply: + pod: + runAsUser: 65534 + container: + ceph_mon_post_apply: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true dns_policy: "ClusterFirstWithHostNet" replicas: mgr: 2 @@ -452,6 +459,7 @@ manifests: job_image_repo_sync: true job_bootstrap: true job_keyring: true + job_post_apply: true service_mon: true service_mgr: true service_mon_discovery: true diff --git a/ceph-mon/values_overrides/apparmor.yaml b/ceph-mon/values_overrides/apparmor.yaml index e6aeea56e..fc93e3203 100644 --- a/ceph-mon/values_overrides/apparmor.yaml +++ b/ceph-mon/values_overrides/apparmor.yaml @@ -30,6 +30,8 @@ pod: ceph-osd-keyring-generator: ceph-osd-keyring-generator: runtime/default init: runtime/default + ceph-mon-post-apply: + ceph-mon-post-apply: runtime/default bootstrap: enabled: true manifests: diff --git a/releasenotes/notes/ceph-mon.yaml b/releasenotes/notes/ceph-mon.yaml index 389eb9c40..202c160b4 100644 --- a/releasenotes/notes/ceph-mon.yaml +++ b/releasenotes/notes/ceph-mon.yaml @@ -19,4 +19,5 @@ ceph-mon: - 0.1.16 Correct Ceph Mon Check Ports - 0.1.17 Skip monmap endpoint check for missing mons - 0.1.18 Move ceph-mgr deployment to the ceph-mon chart + - 0.1.19 Add a post-apply job to restart mons after mgrs ...