From 82c22ff33f72dd90c33059c5a59753c5210696e1 Mon Sep 17 00:00:00 2001 From: Luis Eduardo Bonatti Date: Tue, 27 Aug 2024 18:43:44 -0300 Subject: [PATCH] Manual recovery for USM states During the upgrade of USM that locks the user in the from release in case of a reboot after the completion of the deploy host command because the host state will be deployed with running release N-1. This commit add some changes to allow the user to move the host state from deployed to failed and state from host/-done to host-failed using software-deploy-update script. Test Plan: PASS: Finish activate for major release deployment. PASS: Software deploy abort after activate. PASS: Move the host state from deployed to failed using script. (software-deploy-update -t 'failed' -h 'controller-1' 'admin') PASS: Move the deploy state from host to host-failed using script. (software-deploy-update -s 'host-failed' 'admin') Story: 2010676 Task: 50923 Change-Id: I224f85a83d5d71a663becea42727c64071da26bb Signed-off-by: Luis Eduardo Bonatti --- software/software/deploy_host_state.py | 7 ++++++- software/software/deploy_state.py | 3 ++- software/software/software_controller.py | 21 ++++++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/software/software/deploy_host_state.py b/software/software/deploy_host_state.py index 62ccce47..782844d4 100644 --- a/software/software/deploy_host_state.py +++ b/software/software/deploy_host_state.py @@ -19,7 +19,8 @@ deploy_host_state_transition = { DEPLOY_HOST_STATES.DEPLOYING: [DEPLOY_HOST_STATES.DEPLOYED, DEPLOY_HOST_STATES.FAILED], DEPLOY_HOST_STATES.FAILED: [DEPLOY_HOST_STATES.DEPLOYING, DEPLOY_HOST_STATES.ROLLBACK_DEPLOYED, DEPLOY_HOST_STATES.ROLLBACK_PENDING, DEPLOY_HOST_STATES.FAILED], - DEPLOY_HOST_STATES.DEPLOYED: [DEPLOY_HOST_STATES.ROLLBACK_PENDING], + DEPLOY_HOST_STATES.DEPLOYED: [DEPLOY_HOST_STATES.ROLLBACK_PENDING, + DEPLOY_HOST_STATES.FAILED], # manual recovery scenario DEPLOY_HOST_STATES.ROLLBACK_PENDING: [DEPLOY_HOST_STATES.ROLLBACK_DEPLOYING], DEPLOY_HOST_STATES.ROLLBACK_DEPLOYING: [DEPLOY_HOST_STATES.ROLLBACK_DEPLOYED, DEPLOY_HOST_STATES.ROLLBACK_FAILED], @@ -104,6 +105,10 @@ class DeployHostState(object): else: self.transform(DEPLOY_HOST_STATES.ROLLBACK_FAILED) + def failed(self): + """Transform deploy host state to failed without rollback logic.""" + self.transform(DEPLOY_HOST_STATES.FAILED) + def abort(self): state = self.get_deploy_host_state() if state == DEPLOY_HOST_STATES.PENDING: diff --git a/software/software/deploy_state.py b/software/software/deploy_state.py index f4e2e5d6..2aeabddd 100644 --- a/software/software/deploy_state.py +++ b/software/software/deploy_state.py @@ -32,7 +32,8 @@ deploy_state_transition = { DEPLOY_STATES.HOST_FAILED, DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.HOST_ROLLBACK], - DEPLOY_STATES.HOST_DONE: [DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.HOST_ROLLBACK], + DEPLOY_STATES.HOST_DONE: [DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.HOST_ROLLBACK, + DEPLOY_STATES.HOST_FAILED], # manual recovery scenario # deploy host rollback DEPLOY_STATES.HOST_ROLLBACK: [DEPLOY_STATES.HOST_ROLLBACK, diff --git a/software/software/software_controller.py b/software/software/software_controller.py index 1c766f80..a18bd941 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -779,7 +779,7 @@ class SWMessageDeployStateChanged(messages.PatchMessage): self.valid = True self.agent = None - valid_agents = ['deploy-start', 'deploy-activate', 'deploy-activate-rollback'] + valid_agents = ['deploy-start', 'deploy-activate', 'deploy-activate-rollback', 'admin'] if 'agent' in data: self.agent = data['agent'] else: @@ -797,7 +797,8 @@ class SWMessageDeployStateChanged(messages.PatchMessage): DEPLOY_STATES.ACTIVATE_FAILED.value: DEPLOY_STATES.ACTIVATE_FAILED, DEPLOY_STATES.ACTIVATE_DONE.value: DEPLOY_STATES.ACTIVATE_DONE, DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE, - DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED + DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED, + DEPLOY_STATES.HOST_FAILED.value: DEPLOY_STATES.HOST_FAILED } if 'deploy-state' in data and data['deploy-state']: deploy_state = data['deploy-state'] @@ -814,7 +815,7 @@ class SWMessageDeployStateChanged(messages.PatchMessage): self.hostname = data['hostname'] if 'host-state' in data and data['host-state']: - host_state = data['host-state'] + host_state = states.DEPLOY_HOST_STATES(data['host-state']) if host_state not in states.VALID_HOST_DEPLOY_STATE: LOG.error("%s received from %s with invalid host-state %s" % (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state)) @@ -2633,7 +2634,8 @@ class PatchController(PatchService): DEPLOY_STATES.ACTIVATE_DONE: deploy_state.activate_done, DEPLOY_STATES.ACTIVATE_FAILED: deploy_state.activate_failed, DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE: deploy_state.activate_rollback_done, - DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED: deploy_state.activate_rollback_failed + DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED: deploy_state.activate_rollback_failed, + DEPLOY_STATES.HOST_FAILED: deploy_state.deploy_host_failed } if new_state in state_event: state_event[new_state]() @@ -2643,7 +2645,16 @@ class PatchController(PatchService): def host_deploy_state_changed(self, hostname, host_deploy_state): '''Handle 'host deploy state change' event. ''' - self.db_api_instance.update_deploy_host(hostname, host_deploy_state) + + deploy_host_state = DeployHostState(hostname) + state_event = { + DEPLOY_HOST_STATES.FAILED: deploy_host_state.failed + } + if host_deploy_state in state_event: + state_event[host_deploy_state]() + else: + msg = f"Received invalid deploy host state update {host_deploy_state}" + LOG.error(msg) def add_text_tag_to_xml(self, parent, name, text): tag = ET.SubElement(parent, name)