From ee62bc1621f6c26ad64c258cfb15ad3f6a80f149 Mon Sep 17 00:00:00 2001 From: Pavlo Shchelokovskyy Date: Thu, 13 Apr 2017 09:26:33 +0000 Subject: [PATCH] Optimize node locking on heartbeat Previously we uncoditionally tried to grab an exclusive lock on a node on each heartbeat independent on whether any other action would have to be performed. This patch optimizes that so the lock is upgraded only when node is in a provision state that might require some further action (currently those are DEPLOYWAIT and CLEANWAIT). Also, handling of deprecated 'agent_last_heartbeat' field in driver_internal_info is removed. Change-Id: If18b871aed63a9e5b41ccb9828ef8fa428f5ab48 --- ironic/drivers/modules/agent_base_vendor.py | 22 ++-- .../tests/unit/drivers/modules/test_agent.py | 6 -- .../drivers/modules/test_agent_base_vendor.py | 102 +++++++++--------- .../unit/drivers/modules/test_iscsi_deploy.py | 6 -- 4 files changed, 65 insertions(+), 71 deletions(-) diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index 09f6e9a53d..6c95498ee8 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -258,6 +258,11 @@ class HeartbeatMixin(object): """ + @property + def heartbeat_allowed_states(self): + """Define node states where heartbeating is allowed""" + return (states.DEPLOYWAIT, states.CLEANWAIT) + @METRICS.timer('HeartbeatMixin.heartbeat') def heartbeat(self, task, callback_url): """Process a heartbeat. @@ -265,8 +270,14 @@ class HeartbeatMixin(object): :param task: task to work with. :param callback_url: agent HTTP API URL. """ - # TODO(dtantsur): upgrade lock only if we actually take action other - # than updating the last timestamp. + # NOTE(pas-ha) immediately skip the rest if nothing to do + if task.node.provision_state not in self.heartbeat_allowed_states: + LOG.debug('Heartbeat from node %(node)s in unsupported ' + 'provision state %(state), not taking any action.', + {'node': task.node.uuid, + 'state': task.node.provision_state}) + return + task.upgrade_lock() node = task.node @@ -274,13 +285,6 @@ class HeartbeatMixin(object): driver_internal_info = node.driver_internal_info driver_internal_info['agent_url'] = callback_url - - # TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't - # being used so remove that entry if it exists. - # Hopefully all nodes will have been updated by Pike, so - # we can delete this code then. - driver_internal_info.pop('agent_last_heartbeat', None) - node.driver_internal_info = driver_internal_info node.save() diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 101f8f9d05..4f1f0f944a 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -552,12 +552,6 @@ class TestAgentDeploy(db_base.DbTestCase): tear_down_cleaning_mock.assert_called_once_with( task, manage_boot=False) - def test_heartbeat(self): - with task_manager.acquire(self.context, self.node.uuid, - shared=True) as task: - self.driver.heartbeat(task, 'url') - self.assertFalse(task.shared) - def _test_continue_deploy(self, additional_driver_info=None, additional_expected_image_info=None): self.node.provision_state = states.DEPLOYWAIT diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index 3e1017da92..a043ab6683 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -64,6 +64,53 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): super(HeartbeatMixinTest, self).setUp() self.deploy = agent_base_vendor.HeartbeatMixin() + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', + autospec=True) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, + 'reboot_to_instance', autospec=True) + @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', + autospec=True) + def test_heartbeat_in_maintenance(self, ncrc_mock, rti_mock, cd_mock): + # NOTE(pas-ha) checking only for states that are not noop + for state in (states.DEPLOYWAIT, states.CLEANWAIT): + for m in (ncrc_mock, rti_mock, cd_mock): + m.reset_mock() + self.node.provision_state = state + self.node.maintenance = True + self.node.save() + agent_url = 'url-%s' % state + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.deploy.heartbeat(task, agent_url) + self.assertFalse(task.shared) + self.assertEqual( + agent_url, + task.node.driver_internal_info['agent_url']) + self.assertEqual(0, ncrc_mock.call_count) + self.assertEqual(0, rti_mock.call_count) + self.assertEqual(0, cd_mock.call_count) + + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', + autospec=True) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, + 'reboot_to_instance', autospec=True) + @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', + autospec=True) + def test_heartbeat_noops_in_wrong_state(self, ncrc_mock, rti_mock, + cd_mock): + allowed = {states.DEPLOYWAIT, states.CLEANWAIT} + for state in set(states.machine.states) - allowed: + for m in (ncrc_mock, rti_mock, cd_mock): + m.reset_mock() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.node.provision_state = state + self.deploy.heartbeat(task, 'url') + self.assertTrue(task.shared) + self.assertEqual(0, ncrc_mock.call_count) + self.assertEqual(0, rti_mock.call_count) + self.assertEqual(0, cd_mock.call_count) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True) @@ -215,58 +262,12 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): mock_continue.assert_called_once_with(mock.ANY, task) mock_handler.assert_called_once_with(task, mock.ANY) - @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', - autospec=True) - @mock.patch.object(agent_base_vendor.HeartbeatMixin, - 'reboot_to_instance', autospec=True) - @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', - autospec=True) - def test_heartbeat_no_agent_last_heartbeat(self, ncrc_mock, rti_mock, - cd_mock): - """node.driver_internal_info doesn't have 'agent_last_heartbeat'.""" - node = self.node - node.maintenance = True - node.provision_state = states.AVAILABLE - driver_internal_info = {'agent_last_heartbeat': 'time'} - node.driver_internal_info = driver_internal_info - node.save() - with task_manager.acquire( - self.context, node['uuid'], shared=False) as task: - self.deploy.heartbeat(task, 'http://127.0.0.1:8080') - - self.assertEqual(0, ncrc_mock.call_count) - self.assertEqual(0, rti_mock.call_count) - self.assertEqual(0, cd_mock.call_count) - node.refresh() - self.assertNotIn('agent_last_heartbeat', node.driver_internal_info) - - @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', - autospec=True) - @mock.patch.object(agent_base_vendor.HeartbeatMixin, - 'reboot_to_instance', autospec=True) - @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', - autospec=True) - def test_heartbeat_noops_maintenance_mode(self, ncrc_mock, rti_mock, - cd_mock): - """Ensures that heartbeat() no-ops for a maintenance node.""" - self.node.maintenance = True - for state in (states.AVAILABLE, states.DEPLOYWAIT, states.DEPLOYING, - states.CLEANING): - self.node.provision_state = state - self.node.save() - with task_manager.acquire( - self.context, self.node['uuid'], shared=False) as task: - self.deploy.heartbeat(task, 'http://127.0.0.1:8080') - - self.assertEqual(0, ncrc_mock.call_count) - self.assertEqual(0, rti_mock.call_count) - self.assertEqual(0, cd_mock.call_count) - @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) - def test_heartbeat_touch_provisioning(self, mock_deploy_started, - mock_touch): + def test_heartbeat_touch_provisioning_and_url_save(self, + mock_deploy_started, + mock_touch): mock_deploy_started.return_value = True self.node.provision_state = states.DEPLOYWAIT @@ -274,7 +275,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: self.deploy.heartbeat(task, 'http://127.0.0.1:8080') - + self.assertEqual('http://127.0.0.1:8080', + task.node.driver_internal_info['agent_url']) mock_touch.assert_called_once_with(mock.ANY) diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 9e9489c02e..d7200fc071 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -786,12 +786,6 @@ class ISCSIDeployTestCase(db_base.DbTestCase): agent_execute_clean_step_mock.assert_called_once_with( task, {'some-step': 'step-info'}) - def test_heartbeat(self): - with task_manager.acquire(self.context, self.node.uuid, - shared=True) as task: - self.driver.deploy.heartbeat(task, 'url') - self.assertFalse(task.shared) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, 'reboot_and_finish_deploy', autospec=True) @mock.patch.object(iscsi_deploy, 'do_agent_iscsi_deploy', autospec=True)