From bcfb26840b005e3fe3790692b4b63282c1dbb1e9 Mon Sep 17 00:00:00 2001 From: Felipe Sanches Zanoni Date: Tue, 13 Aug 2024 10:53:50 -0300 Subject: [PATCH] Adjust Ceph monitoring for multiple monitors on same host When locking/unlocking the standby controller, some core dumps were wrongly generated for the mon.controller process. The problem is in the Ceph init script when checking for process hung. The regular expression to filter the ceph health detail command when getting the status of the monitors was expecting only one monitor per host. With the new regular expression the script is now correctly checking the status of each monitor. Story: 2011122 Task: 50824 Test-Plan: PASS: Install AIO-DX and AIO-DX+, lock standby controller and check there is no file named hang_trace_mon* in /var/log/ceph directory. Signed-off-by: Felipe Sanches Zanoni Change-Id: I23fd0fd92f0639fd81baac7521e95384b8e384b5 --- ceph/ceph/debian/deb_folder/ceph-base.ceph.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init index b5d260408..21b1c05c3 100755 --- a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init +++ b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init @@ -375,7 +375,7 @@ is_process_hung() { elif [ "$type" = "mon" ]; then # Get monitor status info local mon_status=$UP - echo "$CEPH_HEALTH_DETAIL" | grep -q -e "^[[:space:]]*$name.*down" + echo "$CEPH_HEALTH_DETAIL" | grep -q -e "^[[:space:]]*$name[[:space:]].*down" if [ $? -eq 0 ]; then mon_status=$DOWN fi