Merge "Remove too large configdrive for handling error"

This commit is contained in:
Zuul 2018-02-15 12:02:09 +00:00 committed by Gerrit Code Review
commit f8ec81233f
3 changed files with 90 additions and 5 deletions

View File

@ -50,6 +50,7 @@ import eventlet
from futurist import periodics
from futurist import waiters
from ironic_lib import metrics_utils
from oslo_db import exception as db_exception
from oslo_log import log
import oslo_messaging as messaging
from oslo_utils import excutils
@ -3178,6 +3179,7 @@ def _store_configdrive(node, configdrive):
i_info = node.instance_info
i_info['configdrive'] = configdrive
node.instance_info = i_info
node.save()
@METRICS.timer('do_node_deploy')
@ -3197,7 +3199,7 @@ def do_node_deploy(task, conductor_id, configdrive=None):
try:
if configdrive:
_store_configdrive(node, configdrive)
except exception.SwiftOperationError as e:
except (exception.SwiftOperationError, exception.ConfigInvalid) as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
@ -3205,6 +3207,26 @@ def do_node_deploy(task, conductor_id, configdrive=None):
'%(node)s to Swift'),
_('Failed to upload the configdrive to Swift. '
'Error: %s'))
except db_exception.DBDataError as e:
with excutils.save_and_reraise_exception():
# NOTE(hshiina): This error happens when the configdrive is
# too large. Remove the configdrive from the
# object to update DB successfully in handling
# the failure.
node.obj_reset_changes()
handle_failure(
e, task,
('Error while storing the configdrive for %(node)s into '
'the database: %(err)s'),
_("Failed to store the configdrive in the database. : %s"))
except Exception as e:
with excutils.save_and_reraise_exception():
handle_failure(
e, task,
('Unexpected error while preparing the configdrive for '
'node %(node)s'),
_("Failed to prepare the configdrive. Exception: %s"),
traceback=True)
try:
task.driver.deploy.prepare(task)

View File

@ -24,6 +24,7 @@ import datetime
import eventlet
import mock
from oslo_config import cfg
from oslo_db import exception as db_exception
import oslo_messaging as messaging
from oslo_utils import uuidutils
from oslo_versionedobjects import base as ovo_base
@ -49,7 +50,6 @@ from ironic.drivers.modules.network import flat as n_flat
from ironic import objects
from ironic.objects import base as obj_base
from ironic.objects import fields as obj_fields
from ironic.tests import base as tests_base
from ironic.tests.unit.conductor import mgr_utils
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -1590,6 +1590,59 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin,
self.assertIsNotNone(node.last_error)
self.assertFalse(mock_deploy.called)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_configdrive_db_error(self, mock_deploy):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
target_provision_state=states.ACTIVE)
task = task_manager.TaskManager(self.context, node.uuid)
task.node.save()
expected_instance_info = dict(node.instance_info)
with mock.patch.object(dbapi.IMPL, 'update_node') as mock_db:
db_node = self.dbapi.get_node_by_uuid(node.uuid)
mock_db.side_effect = [db_exception.DBDataError('DB error'),
db_node, db_node]
self.assertRaises(db_exception.DBDataError,
manager.do_node_deploy, task,
self.service.conductor.id,
configdrive=b'fake config drive')
expected_instance_info.update(configdrive=b'fake config drive')
expected_calls = [
mock.call(node.uuid,
{'version': mock.ANY,
'instance_info': expected_instance_info}),
mock.call(node.uuid,
{'version': mock.ANY,
'provision_state': states.DEPLOYFAIL,
'target_provision_state': states.ACTIVE}),
mock.call(node.uuid,
{'version': mock.ANY,
'last_error': mock.ANY})]
self.assertEqual(expected_calls, mock_db.mock_calls)
self.assertFalse(mock_deploy.called)
@mock.patch.object(manager, '_store_configdrive')
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_configdrive_unexpected_error(self, mock_deploy,
mock_store):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.DEPLOYING,
target_provision_state=states.ACTIVE)
task = task_manager.TaskManager(self.context, node.uuid)
mock_store.side_effect = RuntimeError('unexpected')
self.assertRaises(RuntimeError,
manager.do_node_deploy, task,
self.service.conductor.id,
configdrive=b'fake config drive')
node.refresh()
self.assertEqual(states.DEPLOYFAIL, node.provision_state)
self.assertEqual(states.ACTIVE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
self.assertFalse(mock_deploy.called)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test__do_node_deploy_ok_2(self, mock_deploy):
# NOTE(rloo): a different way of testing for the same thing as in
@ -5742,16 +5795,17 @@ class ManagerSyncLocalStateTestCase(mgr_utils.CommonMixIn, db_base.DbTestCase):
@mock.patch.object(swift, 'SwiftAPI')
class StoreConfigDriveTestCase(tests_base.TestCase):
class StoreConfigDriveTestCase(db_base.DbTestCase):
def setUp(self):
super(StoreConfigDriveTestCase, self).setUp()
self.node = obj_utils.get_test_node(self.context, driver='fake',
instance_info=None)
self.node = obj_utils.create_test_node(self.context, driver='fake',
instance_info=None)
def test_store_configdrive(self, mock_swift):
manager._store_configdrive(self.node, 'foo')
expected_instance_info = {'configdrive': 'foo'}
self.node.refresh()
self.assertEqual(expected_instance_info, self.node.instance_info)
self.assertFalse(mock_swift.called)
@ -5779,6 +5833,7 @@ class StoreConfigDriveTestCase(tests_base.TestCase):
object_headers=expected_obj_header)
mock_swift.return_value.get_temp_url.assert_called_once_with(
container_name, expected_obj_name, timeout)
self.node.refresh()
self.assertEqual(expected_instance_info, self.node.instance_info)

View File

@ -0,0 +1,8 @@
---
fixes:
- |
Fixes a bug to get a node stuck in ``deploying`` state when the size of
the configdrive exceeds the limitation of the database. In MySQL, the
limitation is about 64KiB. With this fix, the provision state gets
``deploy failed`` in this case. See `bug 1745630
<https://bugs.launchpad.net/ironic/+bug/1745630>`_ for details.