From ddd9410d2a4fdb826a7ac8bf9f48b84b51d7c292 Mon Sep 17 00:00:00 2001 From: Heitor Matsui Date: Fri, 8 Nov 2024 12:28:34 -0300 Subject: [PATCH] Restrict ceph mon hook execution This commit adds a condition to run the ceph mon removal hook only when rolling back to stx-8, which is the only scenario where it is needed, and it causes issues if ran in other upgrade paths. This commit also adds a TODO reminder to remove this hook in the future when it is not needed anymore. Test Plan (only affects AIO-DX) PASS: AIO-DX - stx-10 -> stx-8 rollback successful PASS: AIO-DX - stx-11 -> stx-10 rollback successful Story: 2010676 Task: 51307 Change-Id: I5447deaa202fd78e983be26a9a230848bc2bc04e Signed-off-by: Heitor Matsui --- software/software/agent_hooks.py | 52 +++++++++++++++++++------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/software/software/agent_hooks.py b/software/software/agent_hooks.py index 9cdc87d2..cf1b6c60 100644 --- a/software/software/agent_hooks.py +++ b/software/software/agent_hooks.py @@ -304,34 +304,44 @@ class RemoveKubernetesConfigSymlinkHook(BaseHook): raise +# TODO(heitormatsui): delete in the future, not needed for stx-10 -> class RemoveCephMonHook(BaseHook): """ Remove additional ceph-mon added for each controller """ PMON_FILE = "/etc/pmon.d/ceph-fixed-mon.conf" + def __init__(self, attrs): + super().__init__(attrs) + self._major_release = None + if "major_release" in attrs: + self._major_release = attrs.get("major_release") + def run(self): - system_type = utils.get_platform_conf("system_type") - system_mode = utils.get_platform_conf("system_mode") - nodetype = utils.get_platform_conf("nodetype") - # additional monitors were added only for AIO-DX - if (system_type == constants.SYSTEM_TYPE_ALL_IN_ONE and - system_mode != constants.SYSTEM_MODE_SIMPLEX and - nodetype == constants.CONTROLLER): - cmd_remove_mon_controller_0 = ["timeout", "30", "ceph", "mon", "rm", "controller-0"] - cmd_remove_mon_controller_1 = ["timeout", "30", "ceph", "mon", "rm", "controller-1"] - try: - subprocess.check_call(cmd_remove_mon_controller_0) - subprocess.check_call(cmd_remove_mon_controller_1) - LOG.info("Removed mon.controller-0 and mon.controller-1 from ceph cluster.") - except subprocess.CalledProcessError as e: - LOG.exception("Failure removing mon.controller-0 and mon.controller-1 from ceph cluster: %s" % str(e)) - raise - try: - os.unlink(self.PMON_FILE) - LOG.info("Removed %s from pmon." % self.PMON_FILE) - except FileNotFoundError: - pass # ignore if link doesn't exist + # (DX only) on 22.12 there is 1 mon, on 24.09 there are 3 + # so only in 24.09 -> 22.12 rollback this hook is needed + if self._major_release == "22.12": + system_type = utils.get_platform_conf("system_type") + system_mode = utils.get_platform_conf("system_mode") + nodetype = utils.get_platform_conf("nodetype") + # additional monitors were added only for AIO-DX + if (system_type == constants.SYSTEM_TYPE_ALL_IN_ONE and + system_mode != constants.SYSTEM_MODE_SIMPLEX and + nodetype == constants.CONTROLLER): + cmd_remove_mon_controller_0 = ["timeout", "30", "ceph", "mon", "rm", "controller-0"] + cmd_remove_mon_controller_1 = ["timeout", "30", "ceph", "mon", "rm", "controller-1"] + try: + subprocess.check_call(cmd_remove_mon_controller_0) + subprocess.check_call(cmd_remove_mon_controller_1) + LOG.info("Removed mon.controller-0 and mon.controller-1 from ceph cluster.") + except subprocess.CalledProcessError as e: + LOG.exception("Failure removing mon.controller-0 and mon.controller-1 from ceph cluster: %s" % str(e)) + raise + try: + os.unlink(self.PMON_FILE) + LOG.info("Removed %s from pmon." % self.PMON_FILE) + except FileNotFoundError: + pass # ignore if link doesn't exist class RestartKubeApiServer(BaseHook):