From ce46cc461d03cd89794cae0663e1d6c73fa084cd Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 4 Sep 2020 11:33:31 +0200 Subject: [PATCH] Do not assume that prepare_image is the last command to run The get_deploy_steps command can be run after it breaking deploy. Change-Id: I8e641a521a574462010a95a19e8a64ac36d4e52d --- ironic/drivers/modules/agent.py | 44 +++++++++++-------- ironic/drivers/modules/agent_client.py | 39 ++++++++++------ .../tests/unit/drivers/modules/test_agent.py | 28 ++++++++++++ .../notes/agent-uuid-5d86bc18849acda3.yaml | 5 +++ 4 files changed, 84 insertions(+), 32 deletions(-) create mode 100644 releasenotes/notes/agent-uuid-5d86bc18849acda3.yaml diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 8c60c0ad83..1cb380ab21 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -292,25 +292,33 @@ class AgentDeployMixin(agent_base.AgentDeployMixin): # TODO(dtantsur): remove in W def _get_uuid_from_result(self, task, type_uuid): - command = self._client.get_commands_status(task.node)[-1] + command = self._client.get_last_command_status(task.node, + 'prepare_image') + if (not command + or not command.get('command_result', {}).get('result')): + msg = _('Unexpected response from the agent for node %s: the ' + 'running command list does not include prepare_image ' + 'or its result is malformed') % task.node.uuid + LOG.error(msg) + deploy_utils.set_failed_state(task, msg) + return - if command['command_result'] is not None: - words = command['command_result']['result'].split() - for word in words: - if type_uuid in word: - result = word.split('=')[1] - if not result: - msg = (_('Command result did not return %(type_uuid)s ' - 'for node %(node)s. The version of the IPA ' - 'ramdisk used in the deployment might not ' - 'have support for provisioning of ' - 'partition images.') % - {'type_uuid': type_uuid, - 'node': task.node.uuid}) - LOG.error(msg) - deploy_utils.set_failed_state(task, msg) - return - return result + words = command['command_result']['result'].split() + for word in words: + if type_uuid in word: + result = word.split('=')[1] + if not result: + msg = (_('Command result did not return %(type_uuid)s ' + 'for node %(node)s. The version of the IPA ' + 'ramdisk used in the deployment might not ' + 'have support for provisioning of ' + 'partition images.') % + {'type_uuid': type_uuid, + 'node': task.node.uuid}) + LOG.error(msg) + deploy_utils.set_failed_state(task, msg) + return + return result @METRICS.timer('AgentDeployMixin.prepare_instance_boot') @base.deploy_step(priority=60) diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 4cb3495096..1163744259 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -102,23 +102,11 @@ class AgentClient(object): :param method: A string represents the command executed by agent. :raises: AgentCommandTimeout if timeout is reached. """ - try: - method = method.split('.', 1)[1] - except IndexError: - pass - # NOTE(dtantsur): this function uses AgentCommandTimeout on every # failure, but unless the timeout is reached, the exception is caught # and retried by the @retry decorator above. - - commands = self.get_commands_status(node) - try: - result = next(c for c in reversed(commands) - if c.get('command_name') == method) - except StopIteration: - LOG.debug('Command %(cmd)s is not in the executing commands list ' - 'for node %(node)s', - {'cmd': method, 'node': node.uuid}) + result = self.get_last_command_status(node, method) + if result is None: raise exception.AgentCommandTimeout(command=method, node=node.uuid) if result.get('command_status') == 'RUNNING': @@ -312,6 +300,29 @@ class AgentClient(object): {'node': node.uuid, 'status': status}) return result + def get_last_command_status(self, node, method): + """Get the last status for the given command. + + :param node: A Node object. + :param method: Command name. + :returns: A dict containing command status from agent or None + if the command was not found. + """ + try: + method = method.split('.', 1)[1] + except IndexError: + pass + + commands = self.get_commands_status(node) + try: + return next(c for c in reversed(commands) + if c.get('command_name') == method) + except StopIteration: + LOG.debug('Command %(cmd)s is not in the executing commands list ' + 'for node %(node)s', + {'cmd': method, 'node': node.uuid}) + return None + @METRICS.timer('AgentClient.prepare_image') def prepare_image(self, node, image_info, wait=False): """Call the `prepare_image` method on the node. diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index f817779c28..05d27e7ed5 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1649,6 +1649,34 @@ class TestAgentDeploy(db_base.DbTestCase): self.node.refresh() self.assertEqual('bar', self.node.instance_info['foo']) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_get_uuid_from_result(self, mock_statuses): + mock_statuses.return_value = [ + {'command_name': 'banana', 'command_result': None}, + {'command_name': 'prepare_image', + 'command_result': {'result': 'okay root_uuid=abcd'}}, + {'command_name': 'get_deploy_steps', + 'command_result': {'deploy_steps': []}} + ] + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + result = self.driver._get_uuid_from_result(task, 'root_uuid') + self.assertEqual('abcd', result) + + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_get_uuid_from_result_fails(self, mock_statuses): + mock_statuses.return_value = [ + {'command_name': 'banana', 'command_result': None}, + {'command_name': 'get_deploy_steps', + 'command_result': {'deploy_steps': []}} + ] + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + result = self.driver._get_uuid_from_result(task, 'root_uuid') + self.assertIsNone(result) + @mock.patch.object(manager_utils, 'restore_power_state_if_needed', autospec=True) @mock.patch.object(manager_utils, 'power_on_node_if_needed', diff --git a/releasenotes/notes/agent-uuid-5d86bc18849acda3.yaml b/releasenotes/notes/agent-uuid-5d86bc18849acda3.yaml new file mode 100644 index 0000000000..13c61d5222 --- /dev/null +++ b/releasenotes/notes/agent-uuid-5d86bc18849acda3.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes the deployment failure with Ussuri (and older) ramdisks that happens + when another IPA command runs after ``prepare_image``.