Improve stability of Ceph cluster deployment script
Do not fail and retry when some of mon pods are not found while checking its status. This is to avoid situations like the following: ``` 2025-07-10 14:53:17.670728 | primary | + MON_PODS='rook-ceph-mon-a-canary-6d7bf54997-mtzmt 2025-07-10 14:53:17.670767 | primary | rook-ceph-mon-b-canary-7ff47b6fc6-sbtjh 2025-07-10 14:53:17.670781 | primary | rook-ceph-mon-c-canary-68cf8fb595-4jptf' 2025-07-10 14:53:17.670786 | primary | + for MON_POD in $MON_PODS 2025-07-10 14:53:17.670791 | primary | + kubectl get pod --namespace=ceph rook-ceph-mon-a-canary-6d7bf54997-mtzmt 2025-07-10 14:53:17.824501 | primary | + kubectl wait --namespace=ceph --for=condition=ready pod/rook-ceph-mon-a-canary-6d7bf54997-mtzmt --timeout=600s 2025-07-10 14:53:17.897216 | primary | Error from server (NotFound): pods "rook-ceph-mon-a-canary-6d7bf54997-mtzmt" not found ``` Change-Id: I7f10df4d9b395a5775aa3afd42e17dbd09855304 Signed-off-by: Vladimir Kozhukalov <kozhukalov@gmail.com>
This commit is contained in:
@@ -371,13 +371,25 @@ helm osh wait-for-pods rook-ceph
|
||||
kubectl wait --namespace=ceph --for=condition=ready pod --selector=app=rook-ceph-tools --timeout=600s
|
||||
|
||||
# Wait for all monitor pods to be ready
|
||||
MON_PODS=$(kubectl get pods --namespace=ceph --selector=app=rook-ceph-mon --no-headers | awk '{ print $1 }')
|
||||
for MON_POD in $MON_PODS; do
|
||||
if kubectl get pod --namespace=ceph "$MON_POD" > /dev/null 2>&1; then
|
||||
kubectl wait --namespace=ceph --for=condition=ready "pod/$MON_POD" --timeout=600s
|
||||
else
|
||||
echo "Pod $MON_POD not found, skipping..."
|
||||
fi
|
||||
wait_start_time=$(date +%s)
|
||||
while [[ $(($(date +%s) - $wait_start_time)) -lt 1800 ]]; do
|
||||
sleep 30
|
||||
MON_PODS=$(kubectl get pods --namespace=ceph --selector=app=rook-ceph-mon --no-headers | awk '{ print $1 }')
|
||||
MON_PODS_NUM=$(echo $MON_PODS | wc -w)
|
||||
MON_PODS_READY=0
|
||||
for MON_POD in $MON_PODS; do
|
||||
if kubectl get pod --namespace=ceph "$MON_POD" > /dev/null 2>&1; then
|
||||
kubectl wait --namespace=ceph --for=condition=ready "pod/$MON_POD" --timeout=60s && \
|
||||
{ MON_PODS_READY=$(($MON_PODS_READY+1)); } || \
|
||||
echo "Pod $MON_POD not ready, skipping..."
|
||||
else
|
||||
echo "Pod $MON_POD not found, skipping..."
|
||||
fi
|
||||
done
|
||||
if [[ ${MON_PODS_READY} == ${MON_PODS_NUM} ]]; then
|
||||
echo "Monitor pods are ready. Moving on."
|
||||
break;
|
||||
fi
|
||||
done
|
||||
|
||||
echo "=========== CEPH K8S PODS LIST ============"
|
||||
|
Reference in New Issue
Block a user