From cea8d74914db22cb8478d6382e281140dd071253 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Wed, 9 Jan 2019 12:41:24 +0100 Subject: [PATCH] Fix updating nodes with removed or broken drivers Currently when updating a node we try to acquire the lock on it using its old database record, including its old driver. This is not correct when updating a driver, since the current conductor may not have access to the old driver (and the old driver may no longer be enabled at all). Since we do not need task.driver at all when updating nodes, this change stops populating it in update_node. Change-Id: I510c3bfbd11b01fef05341be2bb04c6bd01bf8ac Story: #2004741 Task: #28812 --- ironic/conductor/manager.py | 5 +++++ ironic/conductor/task_manager.py | 11 +++++++++-- ironic/tests/unit/conductor/test_manager.py | 10 ++++++++++ .../tests/unit/conductor/test_task_manager.py | 18 ++++++++++++++++++ .../broken-driver-update-fc5303340080ef04.yaml | 5 +++++ 5 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/broken-driver-update-fc5303340080ef04.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 0c14309336..65e6dc2a92 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -219,7 +219,12 @@ class ConductorManager(base_manager.BaseConductorManager): driver_factory.check_and_update_node_interfaces(node_obj) + # NOTE(dtantsur): if we're updating the driver from an invalid value, + # loading the old driver may be impossible. Since we only need to + # update the node record in the database, skip loading the driver + # completely. with task_manager.acquire(context, node_id, shared=False, + load_driver=False, purpose='node update') as task: # Prevent instance_uuid overwriting if ('instance_uuid' in delta and node_obj.instance_uuid diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index c519379a64..804d8e4a2c 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -170,7 +170,8 @@ class TaskManager(object): """ def __init__(self, context, node_id, shared=False, - purpose='unspecified action', retry=True): + purpose='unspecified action', retry=True, + load_driver=True): """Create a new TaskManager. Acquire a lock on a node. The lock can be either shared or @@ -184,6 +185,9 @@ class TaskManager(object): lock. Default: False. :param purpose: human-readable purpose to put to debug logs. :param retry: whether to retry locking if it fails. Default: True. + :param load_driver: whether to load the ``driver`` object. Set this to + False if loading the driver is undesired or + impossible. :raises: DriverNotFound :raises: InterfaceNotFoundInEntrypoint :raises: NodeNotFound @@ -229,7 +233,10 @@ class TaskManager(object): context, self.node.id) self.volume_targets = objects.VolumeTarget.list_by_node_id( context, self.node.id) - self.driver = driver_factory.build_driver_for_task(self) + if load_driver: + self.driver = driver_factory.build_driver_for_task(self) + else: + self.driver = None except Exception: with excutils.save_and_reraise_exception(): diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 0cfb7de9ec..f34ee34874 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -625,6 +625,16 @@ class UpdateNodeTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): node.refresh() self.assertEqual(existing_driver, node.driver) + def test_update_node_from_invalid_driver(self): + existing_driver = 'fake-hardware' + wrong_driver = 'wrong-driver' + node = obj_utils.create_test_node(self.context, driver=wrong_driver) + node.driver = existing_driver + result = self.service.update_node(self.context, node) + self.assertEqual(existing_driver, result.driver) + node.refresh() + self.assertEqual(existing_driver, node.driver) + UpdateInterfaces = namedtuple('UpdateInterfaces', ('old', 'new')) # NOTE(dtantsur): "old" interfaces here do not match the defaults, so that # we can test resetting them. diff --git a/ironic/tests/unit/conductor/test_task_manager.py b/ironic/tests/unit/conductor/test_task_manager.py index 7043fee5a0..1cb61441ff 100644 --- a/ironic/tests/unit/conductor/test_task_manager.py +++ b/ironic/tests/unit/conductor/test_task_manager.py @@ -79,6 +79,24 @@ class TaskManagerTestCase(db_base.DbTestCase): release_mock.assert_called_once_with(self.context, self.host, self.node.id) + def test_no_driver(self, get_voltgt_mock, get_volconn_mock, + get_portgroups_mock, get_ports_mock, + build_driver_mock, reserve_mock, release_mock, + node_get_mock): + reserve_mock.return_value = self.node + with task_manager.TaskManager(self.context, 'fake-node-id', + load_driver=False) as task: + self.assertEqual(self.context, task.context) + self.assertEqual(self.node, task.node) + self.assertEqual(get_ports_mock.return_value, task.ports) + self.assertEqual(get_portgroups_mock.return_value, task.portgroups) + self.assertEqual(get_volconn_mock.return_value, + task.volume_connectors) + self.assertEqual(get_voltgt_mock.return_value, task.volume_targets) + self.assertIsNone(task.driver) + self.assertFalse(task.shared) + self.assertFalse(build_driver_mock.called) + def test_excl_nested_acquire( self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock, get_ports_mock, build_driver_mock, diff --git a/releasenotes/notes/broken-driver-update-fc5303340080ef04.yaml b/releasenotes/notes/broken-driver-update-fc5303340080ef04.yaml new file mode 100644 index 0000000000..057de4dd93 --- /dev/null +++ b/releasenotes/notes/broken-driver-update-fc5303340080ef04.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + A bug has been fixed in the node update code that could cause the nodes + to become not updatable if their driver is no longer available.