[ceph-client] update logic of inactive pgs check

The PS updates wait_for_inactive_pgs function: - Changed the name of the function to wait_for_pgs - Added a query for getting status of pgs - All pgs should be in "active+" state at least three times in a row Change-Id: Iecc79ebbdfaa74886bca989b23f7741a1c3dca16
2020-07-28 20:17:15 -07:00 · 2020-07-28 20:17:15 -07:00 · f6d6ae051d
commit f6d6ae051d
parent 3ce0170da8
1 changed files with 24 additions and 14 deletions
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@ -29,21 +29,31 @@ if [[ ! -e ${ADMIN_KEYRING} ]]; then
   exit 1
 fi

-function wait_for_inactive_pgs () {
-  echo "#### Start: Checking for inactive pgs ####"
+function wait_for_pgs () {
+  echo "#### Start: Checking pgs ####"
+
+  pgs_ready=0
+  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
+
+  if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
+    query=".pg_stats | ${query}"
+  fi

  # Loop until all pgs are active
-  if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
-    while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | head -n -2 | grep -v "active+"` ]]
-    do
-      sleep 3
-    done
-  else
-    while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | grep -v "active+"` ]]
-    do
-      sleep 3
-    done
-  fi
+  while [[ $pgs_ready -lt 3 ]]; do
+    pgs_state=$(ceph --cluster ${CLUSTER} pg ls -f json | jq -c "${query}")
+    if [[ $(jq -c '. | select(.state | contains("peering") | not)' <<< "${pgs_state}") ]]; then
+      # If inactive PGs aren't peering, fail
+      echo "Failure, found inactive PGs that aren't peering"
+      exit 1
+    fi
+    if [[ "${pgs_state}" ]]; then
+      pgs_ready=0
+    else
+      (( pgs_ready+=1 ))
+    fi
+    sleep 3
+  done
 }

 function check_recovery_flags () {
@ -265,5 +275,5 @@ manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_
 ceph --cluster "${CLUSTER}" osd crush tunables {{ .Values.conf.pool.crush.tunables }}
 {{- end }}

-wait_for_inactive_pgs
+wait_for_pgs
 check_recovery_flags