Remove too large configdrive for handling error
When configdrive is too large, a node object cannot be saved to DB. If it happens, the node cannot moved to DEPLOYFAIL because saving the node is prevented again by the large configdrive in the object. In this case, the node gets stuck in DEPLOYING, which doesn't allow any state transition. This patch removes the configdrive from a node object when storing the configdrive fails. This also catches ConfigInvalid exception, which is mentioned in the docsting, and any unexpected exception from _store_configdrive() to avoid getting a node stuck in DEPLOYING. Change-Id: I83cf3e02622fc3ed8f5b5389f533e374c1b985f3 Closes-Bug: 1745630
This commit is contained in:
parent
9ab04d0962
commit
927c487a0f
@ -50,6 +50,7 @@ import eventlet
|
||||
from futurist import periodics
|
||||
from futurist import waiters
|
||||
from ironic_lib import metrics_utils
|
||||
from oslo_db import exception as db_exception
|
||||
from oslo_log import log
|
||||
import oslo_messaging as messaging
|
||||
from oslo_utils import excutils
|
||||
@ -3171,6 +3172,7 @@ def _store_configdrive(node, configdrive):
|
||||
i_info = node.instance_info
|
||||
i_info['configdrive'] = configdrive
|
||||
node.instance_info = i_info
|
||||
node.save()
|
||||
|
||||
|
||||
@METRICS.timer('do_node_deploy')
|
||||
@ -3190,7 +3192,7 @@ def do_node_deploy(task, conductor_id, configdrive=None):
|
||||
try:
|
||||
if configdrive:
|
||||
_store_configdrive(node, configdrive)
|
||||
except exception.SwiftOperationError as e:
|
||||
except (exception.SwiftOperationError, exception.ConfigInvalid) as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
handle_failure(
|
||||
e, task,
|
||||
@ -3198,6 +3200,26 @@ def do_node_deploy(task, conductor_id, configdrive=None):
|
||||
'%(node)s to Swift'),
|
||||
_('Failed to upload the configdrive to Swift. '
|
||||
'Error: %s'))
|
||||
except db_exception.DBDataError as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
# NOTE(hshiina): This error happens when the configdrive is
|
||||
# too large. Remove the configdrive from the
|
||||
# object to update DB successfully in handling
|
||||
# the failure.
|
||||
node.obj_reset_changes()
|
||||
handle_failure(
|
||||
e, task,
|
||||
('Error while storing the configdrive for %(node)s into '
|
||||
'the database: %(err)s'),
|
||||
_("Failed to store the configdrive in the database. : %s"))
|
||||
except Exception as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
handle_failure(
|
||||
e, task,
|
||||
('Unexpected error while preparing the configdrive for '
|
||||
'node %(node)s'),
|
||||
_("Failed to prepare the configdrive. Exception: %s"),
|
||||
traceback=True)
|
||||
|
||||
try:
|
||||
task.driver.deploy.prepare(task)
|
||||
|
@ -24,6 +24,7 @@ import datetime
|
||||
import eventlet
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_db import exception as db_exception
|
||||
import oslo_messaging as messaging
|
||||
from oslo_utils import uuidutils
|
||||
from oslo_versionedobjects import base as ovo_base
|
||||
@ -49,7 +50,6 @@ from ironic.drivers.modules.network import flat as n_flat
|
||||
from ironic import objects
|
||||
from ironic.objects import base as obj_base
|
||||
from ironic.objects import fields as obj_fields
|
||||
from ironic.tests import base as tests_base
|
||||
from ironic.tests.unit.conductor import mgr_utils
|
||||
from ironic.tests.unit.db import base as db_base
|
||||
from ironic.tests.unit.db import utils as db_utils
|
||||
@ -1590,6 +1590,59 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
self.assertIsNotNone(node.last_error)
|
||||
self.assertFalse(mock_deploy.called)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
|
||||
def test__do_node_deploy_configdrive_db_error(self, mock_deploy):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.DEPLOYING,
|
||||
target_provision_state=states.ACTIVE)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
task.node.save()
|
||||
expected_instance_info = dict(node.instance_info)
|
||||
with mock.patch.object(dbapi.IMPL, 'update_node') as mock_db:
|
||||
db_node = self.dbapi.get_node_by_uuid(node.uuid)
|
||||
mock_db.side_effect = [db_exception.DBDataError('DB error'),
|
||||
db_node, db_node]
|
||||
self.assertRaises(db_exception.DBDataError,
|
||||
manager.do_node_deploy, task,
|
||||
self.service.conductor.id,
|
||||
configdrive=b'fake config drive')
|
||||
expected_instance_info.update(configdrive=b'fake config drive')
|
||||
expected_calls = [
|
||||
mock.call(node.uuid,
|
||||
{'version': mock.ANY,
|
||||
'instance_info': expected_instance_info}),
|
||||
mock.call(node.uuid,
|
||||
{'version': mock.ANY,
|
||||
'provision_state': states.DEPLOYFAIL,
|
||||
'target_provision_state': states.ACTIVE}),
|
||||
mock.call(node.uuid,
|
||||
{'version': mock.ANY,
|
||||
'last_error': mock.ANY})]
|
||||
self.assertEqual(expected_calls, mock_db.mock_calls)
|
||||
self.assertFalse(mock_deploy.called)
|
||||
|
||||
@mock.patch.object(manager, '_store_configdrive')
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
|
||||
def test__do_node_deploy_configdrive_unexpected_error(self, mock_deploy,
|
||||
mock_store):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.DEPLOYING,
|
||||
target_provision_state=states.ACTIVE)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
|
||||
mock_store.side_effect = RuntimeError('unexpected')
|
||||
self.assertRaises(RuntimeError,
|
||||
manager.do_node_deploy, task,
|
||||
self.service.conductor.id,
|
||||
configdrive=b'fake config drive')
|
||||
node.refresh()
|
||||
self.assertEqual(states.DEPLOYFAIL, node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
self.assertFalse(mock_deploy.called)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
|
||||
def test__do_node_deploy_ok_2(self, mock_deploy):
|
||||
# NOTE(rloo): a different way of testing for the same thing as in
|
||||
@ -5717,16 +5770,17 @@ class ManagerSyncLocalStateTestCase(mgr_utils.CommonMixIn, db_base.DbTestCase):
|
||||
|
||||
|
||||
@mock.patch.object(swift, 'SwiftAPI')
|
||||
class StoreConfigDriveTestCase(tests_base.TestCase):
|
||||
class StoreConfigDriveTestCase(db_base.DbTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(StoreConfigDriveTestCase, self).setUp()
|
||||
self.node = obj_utils.get_test_node(self.context, driver='fake',
|
||||
instance_info=None)
|
||||
self.node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
instance_info=None)
|
||||
|
||||
def test_store_configdrive(self, mock_swift):
|
||||
manager._store_configdrive(self.node, 'foo')
|
||||
expected_instance_info = {'configdrive': 'foo'}
|
||||
self.node.refresh()
|
||||
self.assertEqual(expected_instance_info, self.node.instance_info)
|
||||
self.assertFalse(mock_swift.called)
|
||||
|
||||
@ -5754,6 +5808,7 @@ class StoreConfigDriveTestCase(tests_base.TestCase):
|
||||
object_headers=expected_obj_header)
|
||||
mock_swift.return_value.get_temp_url.assert_called_once_with(
|
||||
container_name, expected_obj_name, timeout)
|
||||
self.node.refresh()
|
||||
self.assertEqual(expected_instance_info, self.node.instance_info)
|
||||
|
||||
|
||||
|
8
releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml
Normal file
8
releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes a bug to get a node stuck in ``deploying`` state when the size of
|
||||
the configdrive exceeds the limitation of the database. In MySQL, the
|
||||
limitation is about 64KiB. With this fix, the provision state gets
|
||||
``deploy failed`` in this case. See `bug 1745630
|
||||
<https://bugs.launchpad.net/ironic/+bug/1745630>`_ for details.
|
Loading…
Reference in New Issue
Block a user