[ceph-client] update logic of inactive pgs check

The PS updates wait_for_inactive_pgs function:
- Changed the name of the function to wait_for_pgs
- Added a query for getting status of pgs
- All pgs should be in "active+" state at least three times in a row

Change-Id: Iecc79ebbdfaa74886bca989b23f7741a1c3dca16
This commit is contained in:
Kabanov, Dmitrii 2020-07-28 20:17:15 -07:00
parent 3ce0170da8
commit f6d6ae051d

View File

@ -29,21 +29,31 @@ if [[ ! -e ${ADMIN_KEYRING} ]]; then
exit 1
fi
function wait_for_inactive_pgs () {
echo "#### Start: Checking for inactive pgs ####"
function wait_for_pgs () {
echo "#### Start: Checking pgs ####"
pgs_ready=0
query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
query=".pg_stats | ${query}"
fi
# Loop until all pgs are active
if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | head -n -2 | grep -v "active+"` ]]
do
sleep 3
done
else
while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | grep -v "active+"` ]]
do
sleep 3
done
fi
while [[ $pgs_ready -lt 3 ]]; do
pgs_state=$(ceph --cluster ${CLUSTER} pg ls -f json | jq -c "${query}")
if [[ $(jq -c '. | select(.state | contains("peering") | not)' <<< "${pgs_state}") ]]; then
# If inactive PGs aren't peering, fail
echo "Failure, found inactive PGs that aren't peering"
exit 1
fi
if [[ "${pgs_state}" ]]; then
pgs_ready=0
else
(( pgs_ready+=1 ))
fi
sleep 3
done
}
function check_recovery_flags () {
@ -265,5 +275,5 @@ manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_
ceph --cluster "${CLUSTER}" osd crush tunables {{ .Values.conf.pool.crush.tunables }}
{{- end }}
wait_for_inactive_pgs
wait_for_pgs
check_recovery_flags