[ceph-mon] Add a post-apply job to restart mons after mgrs
If the OnDelete pod restart strategy is used for the ceph-mon daemonset, run a post-apply job to restart the ceph-mon pods one at a time. Otherwise the mons could restart before the mgrs, which can be problematic in some upgrade scenarios. Change-Id: I57f87130e95088217c3cfe73512caaae41d3ef22
This commit is contained in:
parent
ea2c0115c4
commit
ae17a61836
@ -15,6 +15,6 @@ apiVersion: v1
|
|||||||
appVersion: v1.0.0
|
appVersion: v1.0.0
|
||||||
description: OpenStack-Helm Ceph Mon
|
description: OpenStack-Helm Ceph Mon
|
||||||
name: ceph-mon
|
name: ceph-mon
|
||||||
version: 0.1.18
|
version: 0.1.19
|
||||||
home: https://github.com/ceph/ceph
|
home: https://github.com/ceph/ceph
|
||||||
...
|
...
|
||||||
|
132
ceph-mon/templates/bin/_post-apply.sh.tpl
Normal file
132
ceph-mon/templates/bin/_post-apply.sh.tpl
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
export LC_ALL=C
|
||||||
|
|
||||||
|
: "${ADMIN_KEYRING:=/etc/ceph/${CLUSTER}.client.admin.keyring}"
|
||||||
|
|
||||||
|
if [[ ! -f /etc/ceph/${CLUSTER}.conf ]]; then
|
||||||
|
echo "ERROR- /etc/ceph/${CLUSTER}.conf must exist; get it from your existing mon"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f ${ADMIN_KEYRING} ]]; then
|
||||||
|
echo "ERROR- ${ADMIN_KEYRING} must exist; get it from your existing mon"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ceph --cluster ${CLUSTER} -s
|
||||||
|
function wait_for_pods() {
|
||||||
|
timeout=${2:-1800}
|
||||||
|
end=$(date -ud "${timeout} seconds" +%s)
|
||||||
|
# Selecting containers with "ceph-mon" name and
|
||||||
|
# counting them based on "ready" field.
|
||||||
|
count_pods=".items | map(.status.containerStatuses | .[] | \
|
||||||
|
select(.name==\"ceph-mon\")) | \
|
||||||
|
group_by(.ready) | map({(.[0].ready | tostring): length}) | .[]"
|
||||||
|
min_mons="add | if .true >= (.false + .true) \
|
||||||
|
then \"pass\" else \"fail\" end"
|
||||||
|
while true; do
|
||||||
|
# Leave while loop if all mons are ready.
|
||||||
|
state=$(kubectl get pods --namespace="${1}" -l component=mon -o json | jq "${count_pods}")
|
||||||
|
mon_state=$(jq -s "${min_mons}" <<< "${state}")
|
||||||
|
if [[ "${mon_state}" == \"pass\" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
if [ $(date -u +%s) -gt $end ] ; then
|
||||||
|
echo -e "Containers failed to start after $timeout seconds\n"
|
||||||
|
kubectl get pods --namespace "${1}" -o wide -l component=mon
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_ds() {
|
||||||
|
for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=mon --no-headers=true|awk '{print $1}'`
|
||||||
|
do
|
||||||
|
ds_query=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status`
|
||||||
|
if echo $ds_query |grep -i "numberAvailable" ;then
|
||||||
|
currentNumberScheduled=`echo $ds_query|jq -r .currentNumberScheduled`
|
||||||
|
desiredNumberScheduled=`echo $ds_query|jq -r .desiredNumberScheduled`
|
||||||
|
numberAvailable=`echo $ds_query|jq -r .numberAvailable`
|
||||||
|
numberReady=`echo $ds_query|jq -r .numberReady`
|
||||||
|
updatedNumberScheduled=`echo $ds_query|jq -r .updatedNumberScheduled`
|
||||||
|
ds_check=`echo "$currentNumberScheduled $desiredNumberScheduled $numberAvailable $numberReady $updatedNumberScheduled"| \
|
||||||
|
tr ' ' '\n'|sort -u|wc -l`
|
||||||
|
if [ $ds_check != 1 ]; then
|
||||||
|
echo "Some pods in daemonset $ds are not ready"
|
||||||
|
exit
|
||||||
|
else
|
||||||
|
echo "All pods in deamonset $ds are ready"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "There are no mons under daemonset $ds"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function restart_mons() {
|
||||||
|
mon_pods=`kubectl get po -n $CEPH_NAMESPACE -l component=mon --no-headers | awk '{print $1}'`
|
||||||
|
|
||||||
|
for pod in ${mon_pods}
|
||||||
|
do
|
||||||
|
if [[ -n "$pod" ]]; then
|
||||||
|
echo "Restarting pod $pod"
|
||||||
|
kubectl delete pod -n $CEPH_NAMESPACE $pod
|
||||||
|
fi
|
||||||
|
echo "Waiting for the pod $pod to restart"
|
||||||
|
# The pod will not be ready in first 60 seconds. Thus we can reduce
|
||||||
|
# amount of queries to kubernetes.
|
||||||
|
sleep 60
|
||||||
|
wait_for_pods
|
||||||
|
ceph -s
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_pods $CEPH_NAMESPACE
|
||||||
|
|
||||||
|
require_upgrade=0
|
||||||
|
max_release=0
|
||||||
|
|
||||||
|
for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=mon --no-headers=true|awk '{print $1}'`
|
||||||
|
do
|
||||||
|
updatedNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.updatedNumberScheduled`
|
||||||
|
desiredNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.desiredNumberScheduled`
|
||||||
|
if [[ $updatedNumberScheduled != $desiredNumberScheduled ]]; then
|
||||||
|
if kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status|grep -i "numberAvailable" ;then
|
||||||
|
require_upgrade=$((require_upgrade+1))
|
||||||
|
_release=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.observedGeneration`
|
||||||
|
max_release=$(( max_release > _release ? max_release : _release ))
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Latest revision of the helm chart(s) is : $max_release"
|
||||||
|
|
||||||
|
if [[ $max_release -gt 1 ]]; then
|
||||||
|
if [[ $require_upgrade -gt 0 ]]; then
|
||||||
|
echo "Restart ceph-mon pods one at a time to prevent disruption"
|
||||||
|
restart_mons
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check all the ceph-mon daemonsets
|
||||||
|
echo "checking DS"
|
||||||
|
check_ds
|
||||||
|
else
|
||||||
|
echo "No revisions found for upgrade"
|
||||||
|
fi
|
@ -29,6 +29,8 @@ data:
|
|||||||
bootstrap.sh: |
|
bootstrap.sh: |
|
||||||
{{ tuple "bin/_bootstrap.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
{{ tuple "bin/_bootstrap.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
post-apply.sh: |
|
||||||
|
{{ tuple "bin/_post-apply.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||||
|
|
||||||
init-dirs.sh: |
|
init-dirs.sh: |
|
||||||
{{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
{{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||||
|
143
ceph-mon/templates/job-post-apply.yaml
Normal file
143
ceph-mon/templates/job-post-apply.yaml
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
{{/*
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
{{- if eq .Values.pod.lifecycle.upgrades.daemonsets.pod_replacement_strategy "OnDelete" }}
|
||||||
|
{{- if and .Values.manifests.job_post_apply }}
|
||||||
|
{{- $envAll := . }}
|
||||||
|
|
||||||
|
{{- $serviceAccountName := printf "%s-%s" .Release.Name "post-apply" }}
|
||||||
|
{{ tuple $envAll "post-apply" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
||||||
|
---
|
||||||
|
kind: ClusterRole
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: {{ $serviceAccountName }}
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ''
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- events
|
||||||
|
- jobs
|
||||||
|
- pods/exec
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- get
|
||||||
|
- delete
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- 'apps'
|
||||||
|
resources:
|
||||||
|
- daemonsets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- apiGroups:
|
||||||
|
- 'batch'
|
||||||
|
resources:
|
||||||
|
- jobs
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: {{ $serviceAccountName }}
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: {{ $serviceAccountName }}
|
||||||
|
namespace: {{ $envAll.Release.Namespace }}
|
||||||
|
roleRef:
|
||||||
|
kind: ClusterRole
|
||||||
|
name: {{ $serviceAccountName }}
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
---
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: {{ $serviceAccountName }}
|
||||||
|
labels:
|
||||||
|
{{ tuple $envAll "ceph-upgrade" "post-apply" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
|
||||||
|
annotations:
|
||||||
|
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
{{ tuple $envAll "ceph-upgrade" "post-apply" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
|
||||||
|
annotations:
|
||||||
|
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
|
||||||
|
{{ dict "envAll" $envAll "podName" "ceph-mon-post-apply" "containerNames" (list "ceph-mon-post-apply" "init" ) | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }}
|
||||||
|
spec:
|
||||||
|
{{ dict "envAll" $envAll "application" "post_apply" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
|
||||||
|
serviceAccountName: {{ $serviceAccountName }}
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
nodeSelector:
|
||||||
|
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value }}
|
||||||
|
initContainers:
|
||||||
|
{{ tuple $envAll "post-apply" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
|
||||||
|
containers:
|
||||||
|
- name: ceph-mon-post-apply
|
||||||
|
{{ tuple $envAll "ceph_config_helper" | include "helm-toolkit.snippets.image" | indent 10 }}
|
||||||
|
{{ tuple $envAll $envAll.Values.pod.resources.jobs.bootstrap | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
|
||||||
|
{{ dict "envAll" $envAll "application" "post_apply" "container" "ceph_mon_post_apply" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
|
||||||
|
env:
|
||||||
|
- name: CLUSTER
|
||||||
|
value: "ceph"
|
||||||
|
- name: CEPH_NAMESPACE
|
||||||
|
value: {{ .Release.Namespace }}
|
||||||
|
- name: RELEASE_GROUP_NAME
|
||||||
|
value: {{ .Release.Name }}
|
||||||
|
command:
|
||||||
|
- /tmp/post-apply.sh
|
||||||
|
volumeMounts:
|
||||||
|
- name: pod-tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: pod-etc-ceph
|
||||||
|
mountPath: /etc/ceph
|
||||||
|
- name: ceph-mon-bin
|
||||||
|
mountPath: /tmp/post-apply.sh
|
||||||
|
subPath: post-apply.sh
|
||||||
|
readOnly: true
|
||||||
|
- name: ceph-mon-bin
|
||||||
|
mountPath: /tmp/wait-for-pods.sh
|
||||||
|
subPath: wait-for-pods.sh
|
||||||
|
readOnly: true
|
||||||
|
- name: ceph-mon-etc
|
||||||
|
mountPath: /etc/ceph/ceph.conf
|
||||||
|
subPath: ceph.conf
|
||||||
|
readOnly: true
|
||||||
|
- name: ceph-mon-admin-keyring
|
||||||
|
mountPath: /etc/ceph/ceph.client.admin.keyring
|
||||||
|
subPath: ceph.client.admin.keyring
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: pod-tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: pod-etc-ceph
|
||||||
|
emptyDir: {}
|
||||||
|
- name: ceph-mon-bin
|
||||||
|
configMap:
|
||||||
|
name: {{ printf "%s-%s" $envAll.Release.Name "bin" | quote }}
|
||||||
|
defaultMode: 0555
|
||||||
|
- name: ceph-mon-etc
|
||||||
|
configMap:
|
||||||
|
name: {{ printf "%s-%s" $envAll.Release.Name "etc" | quote }}
|
||||||
|
defaultMode: 0444
|
||||||
|
- name: ceph-mon-admin-keyring
|
||||||
|
secret:
|
||||||
|
secretName: {{ .Values.secrets.keyrings.admin }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
@ -111,6 +111,13 @@ pod:
|
|||||||
ceph-osd-keyring-generator:
|
ceph-osd-keyring-generator:
|
||||||
allowPrivilegeEscalation: false
|
allowPrivilegeEscalation: false
|
||||||
readOnlyRootFilesystem: true
|
readOnlyRootFilesystem: true
|
||||||
|
post_apply:
|
||||||
|
pod:
|
||||||
|
runAsUser: 65534
|
||||||
|
container:
|
||||||
|
ceph_mon_post_apply:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
dns_policy: "ClusterFirstWithHostNet"
|
dns_policy: "ClusterFirstWithHostNet"
|
||||||
replicas:
|
replicas:
|
||||||
mgr: 2
|
mgr: 2
|
||||||
@ -452,6 +459,7 @@ manifests:
|
|||||||
job_image_repo_sync: true
|
job_image_repo_sync: true
|
||||||
job_bootstrap: true
|
job_bootstrap: true
|
||||||
job_keyring: true
|
job_keyring: true
|
||||||
|
job_post_apply: true
|
||||||
service_mon: true
|
service_mon: true
|
||||||
service_mgr: true
|
service_mgr: true
|
||||||
service_mon_discovery: true
|
service_mon_discovery: true
|
||||||
|
@ -30,6 +30,8 @@ pod:
|
|||||||
ceph-osd-keyring-generator:
|
ceph-osd-keyring-generator:
|
||||||
ceph-osd-keyring-generator: runtime/default
|
ceph-osd-keyring-generator: runtime/default
|
||||||
init: runtime/default
|
init: runtime/default
|
||||||
|
ceph-mon-post-apply:
|
||||||
|
ceph-mon-post-apply: runtime/default
|
||||||
bootstrap:
|
bootstrap:
|
||||||
enabled: true
|
enabled: true
|
||||||
manifests:
|
manifests:
|
||||||
|
@ -19,4 +19,5 @@ ceph-mon:
|
|||||||
- 0.1.16 Correct Ceph Mon Check Ports
|
- 0.1.16 Correct Ceph Mon Check Ports
|
||||||
- 0.1.17 Skip monmap endpoint check for missing mons
|
- 0.1.17 Skip monmap endpoint check for missing mons
|
||||||
- 0.1.18 Move ceph-mgr deployment to the ceph-mon chart
|
- 0.1.18 Move ceph-mgr deployment to the ceph-mon chart
|
||||||
|
- 0.1.19 Add a post-apply job to restart mons after mgrs
|
||||||
...
|
...
|
||||||
|
Loading…
Reference in New Issue
Block a user