From 21236a3d4bf8360af9243d87af10c797f591fe51 Mon Sep 17 00:00:00 2001 From: Felipe Sanches Zanoni Date: Sat, 17 Dec 2022 14:07:43 -0500 Subject: [PATCH] Ceph-manager: Change timeout usage for ceph commands Ceph-manager is using cephclient to get information from ceph and the command 'ceph restful list-keys --connect-timeout 15' is timing out and crashing, generating core dumps. Removed the '-connect-timeout' parameter and added 'timeout' argument in check_output function. This way no core dumps were seen. Test Plan: PASS: Fresh install AIO-SX, stop ceph services, watch ceph-manager.log file looking for timed out commands and verify if there is any core dump from ceph. Closes-bug: 1999985 Signed-off-by: Felipe Sanches Zanoni Change-Id: Ie119cacd2409de07ed700fd554df03d4187a252d Signed-off-by: Erickson Silva de Oliveira --- .../python-cephclient/cephclient/client.py | 26 ++++++++++--------- .../python-cephclient/cephclient/exception.py | 3 +++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/ceph/python-cephclient/python-cephclient/cephclient/client.py b/ceph/python-cephclient/python-cephclient/cephclient/client.py index da085db4..f06291a7 100644 --- a/ceph/python-cephclient/python-cephclient/cephclient/client.py +++ b/ceph/python-cephclient/python-cephclient/cephclient/client.py @@ -69,12 +69,13 @@ class CephClient(object): def _get_password(self): try: output = subprocess.check_output( - ('ceph restful list-keys ' - '--connect-timeout {}').format( - CEPH_CLI_TIMEOUT_SEC), + 'ceph restful list-keys', + timeout=CEPH_CLI_TIMEOUT_SEC, shell=True) except subprocess.CalledProcessError as e: raise exception.CephMonRestfulListKeysError(str(e)) + except subprocess.TimeoutExpired as e: + raise exception.CephCliTimeoutExpired(str(e)) try: keys = json.loads(output) except (KeyError, ValueError): @@ -89,12 +90,13 @@ class CephClient(object): while attempts <= CEPH_GET_SERVICE_RETRY_COUNT: try: output = subprocess.check_output( - ('ceph mgr services ' - '--connect-timeout {}').format( - CEPH_CLI_TIMEOUT_SEC), + 'ceph mgr services', + timeout=CEPH_CLI_TIMEOUT_SEC, shell=True) except subprocess.CalledProcessError as e: raise exception.CephMgrDumpError(str(e)) + except subprocess.TimeoutExpired as e: + raise exception.CephCliTimeoutExpired(str(e)) try: status = json.loads(output) if not status: @@ -115,12 +117,13 @@ class CephClient(object): def _get_service_hostname(self): try: output = subprocess.check_output( - ('ceph mgr metadata ' - '--connect-timeout {}').format( - CEPH_CLI_TIMEOUT_SEC), + 'ceph mgr metadata', + timeout=CEPH_CLI_TIMEOUT_SEC, shell=True) except subprocess.CalledProcessError as e: raise exception.CephMgrDumpError(str(e)) + except subprocess.TimeoutExpired as e: + raise exception.CephCliTimeoutExpired(str(e)) try: status = json.loads(output) except (KeyError, ValueError): @@ -133,12 +136,11 @@ class CephClient(object): try: certificate = subprocess.check_output( ('ceph config-key get ' - '--connect-timeout {} ' 'mgr/restful/{}/crt').format( - CEPH_CLI_TIMEOUT_SEC, hostname), + timeout=CEPH_CLI_TIMEOUT_SEC, shell=True) - except subprocess.CalledProcessError: + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): return with tempfile.NamedTemporaryFile(delete=False) as self.cert_file: self.cert_file.write(certificate) diff --git a/ceph/python-cephclient/python-cephclient/cephclient/exception.py b/ceph/python-cephclient/python-cephclient/cephclient/exception.py index c1d754dd..4048c857 100644 --- a/ceph/python-cephclient/python-cephclient/cephclient/exception.py +++ b/ceph/python-cephclient/python-cephclient/cephclient/exception.py @@ -99,3 +99,6 @@ class CephClientNoSuchUser(CephClientException): class CephClientIncorrectPassword(CephClientException): message = ("Incorrect password for user '{user}'.") + +class CephCliTimeoutExpired(CephClientException): + message = "Timeout was reached while executing the command. {}"