From 927c487a0f822ab789f38be5f895fe061e5e71cb Mon Sep 17 00:00:00 2001 From: Hironori Shiina Date: Thu, 8 Feb 2018 18:03:09 +0900 Subject: [PATCH] Remove too large configdrive for handling error When configdrive is too large, a node object cannot be saved to DB. If it happens, the node cannot moved to DEPLOYFAIL because saving the node is prevented again by the large configdrive in the object. In this case, the node gets stuck in DEPLOYING, which doesn't allow any state transition. This patch removes the configdrive from a node object when storing the configdrive fails. This also catches ConfigInvalid exception, which is mentioned in the docsting, and any unexpected exception from _store_configdrive() to avoid getting a node stuck in DEPLOYING. Change-Id: I83cf3e02622fc3ed8f5b5389f533e374c1b985f3 Closes-Bug: 1745630 --- ironic/conductor/manager.py | 24 ++++++- ironic/tests/unit/conductor/test_manager.py | 63 +++++++++++++++++-- .../notes/bug-1745630-d28c8de54cebd329.yaml | 8 +++ 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 6d57c4adf2..7ee1bc2c14 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -50,6 +50,7 @@ import eventlet from futurist import periodics from futurist import waiters from ironic_lib import metrics_utils +from oslo_db import exception as db_exception from oslo_log import log import oslo_messaging as messaging from oslo_utils import excutils @@ -3171,6 +3172,7 @@ def _store_configdrive(node, configdrive): i_info = node.instance_info i_info['configdrive'] = configdrive node.instance_info = i_info + node.save() @METRICS.timer('do_node_deploy') @@ -3190,7 +3192,7 @@ def do_node_deploy(task, conductor_id, configdrive=None): try: if configdrive: _store_configdrive(node, configdrive) - except exception.SwiftOperationError as e: + except (exception.SwiftOperationError, exception.ConfigInvalid) as e: with excutils.save_and_reraise_exception(): handle_failure( e, task, @@ -3198,6 +3200,26 @@ def do_node_deploy(task, conductor_id, configdrive=None): '%(node)s to Swift'), _('Failed to upload the configdrive to Swift. ' 'Error: %s')) + except db_exception.DBDataError as e: + with excutils.save_and_reraise_exception(): + # NOTE(hshiina): This error happens when the configdrive is + # too large. Remove the configdrive from the + # object to update DB successfully in handling + # the failure. + node.obj_reset_changes() + handle_failure( + e, task, + ('Error while storing the configdrive for %(node)s into ' + 'the database: %(err)s'), + _("Failed to store the configdrive in the database. : %s")) + except Exception as e: + with excutils.save_and_reraise_exception(): + handle_failure( + e, task, + ('Unexpected error while preparing the configdrive for ' + 'node %(node)s'), + _("Failed to prepare the configdrive. Exception: %s"), + traceback=True) try: task.driver.deploy.prepare(task) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index c183562be8..41de33b00b 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -24,6 +24,7 @@ import datetime import eventlet import mock from oslo_config import cfg +from oslo_db import exception as db_exception import oslo_messaging as messaging from oslo_utils import uuidutils from oslo_versionedobjects import base as ovo_base @@ -49,7 +50,6 @@ from ironic.drivers.modules.network import flat as n_flat from ironic import objects from ironic.objects import base as obj_base from ironic.objects import fields as obj_fields -from ironic.tests import base as tests_base from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -1590,6 +1590,59 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin, self.assertIsNotNone(node.last_error) self.assertFalse(mock_deploy.called) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_db_error(self, mock_deploy): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + task.node.save() + expected_instance_info = dict(node.instance_info) + with mock.patch.object(dbapi.IMPL, 'update_node') as mock_db: + db_node = self.dbapi.get_node_by_uuid(node.uuid) + mock_db.side_effect = [db_exception.DBDataError('DB error'), + db_node, db_node] + self.assertRaises(db_exception.DBDataError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + expected_instance_info.update(configdrive=b'fake config drive') + expected_calls = [ + mock.call(node.uuid, + {'version': mock.ANY, + 'instance_info': expected_instance_info}), + mock.call(node.uuid, + {'version': mock.ANY, + 'provision_state': states.DEPLOYFAIL, + 'target_provision_state': states.ACTIVE}), + mock.call(node.uuid, + {'version': mock.ANY, + 'last_error': mock.ANY})] + self.assertEqual(expected_calls, mock_db.mock_calls) + self.assertFalse(mock_deploy.called) + + @mock.patch.object(manager, '_store_configdrive') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_unexpected_error(self, mock_deploy, + mock_store): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + + mock_store.side_effect = RuntimeError('unexpected') + self.assertRaises(RuntimeError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(mock_deploy.called) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') def test__do_node_deploy_ok_2(self, mock_deploy): # NOTE(rloo): a different way of testing for the same thing as in @@ -5717,16 +5770,17 @@ class ManagerSyncLocalStateTestCase(mgr_utils.CommonMixIn, db_base.DbTestCase): @mock.patch.object(swift, 'SwiftAPI') -class StoreConfigDriveTestCase(tests_base.TestCase): +class StoreConfigDriveTestCase(db_base.DbTestCase): def setUp(self): super(StoreConfigDriveTestCase, self).setUp() - self.node = obj_utils.get_test_node(self.context, driver='fake', - instance_info=None) + self.node = obj_utils.create_test_node(self.context, driver='fake', + instance_info=None) def test_store_configdrive(self, mock_swift): manager._store_configdrive(self.node, 'foo') expected_instance_info = {'configdrive': 'foo'} + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) self.assertFalse(mock_swift.called) @@ -5754,6 +5808,7 @@ class StoreConfigDriveTestCase(tests_base.TestCase): object_headers=expected_obj_header) mock_swift.return_value.get_temp_url.assert_called_once_with( container_name, expected_obj_name, timeout) + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) diff --git a/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml new file mode 100644 index 0000000000..5fafb4be52 --- /dev/null +++ b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes a bug to get a node stuck in ``deploying`` state when the size of + the configdrive exceeds the limitation of the database. In MySQL, the + limitation is about 64KiB. With this fix, the provision state gets + ``deploy failed`` in this case. See `bug 1745630 + `_ for details.