From 8e5337c39acdea1b82e367a798ea9a47c5a8d785 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 4 Jan 2019 13:35:45 +0100 Subject: [PATCH] Allocation API: allow skipping retries in TaskManager To reduce the number of times the API returns HTTP CONFLICT we retry acquiring an exclusive lock in TaskManager. However, for the allocation API it's not desired since we usually have several nodes to work with, and another node can be picked without waiting for the current one to become available. The new "retry" argument to TaskManager allows disabling the retries. Change-Id: Ia8267b1baa8b671a602e7fc402b7ef083e2ac2ed Story: #2004341 Task: #28722 --- ironic/conductor/task_manager.py | 19 +++++++++------- .../tests/unit/conductor/test_task_manager.py | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index b32d33a71b..c519379a64 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -149,20 +149,16 @@ def require_exclusive_lock(f): return wrapper -def acquire(context, node_id, shared=False, purpose='unspecified action'): +def acquire(context, *args, **kwargs): """Shortcut for acquiring a lock on a Node. :param context: Request context. - :param node_id: ID or UUID of node to lock. - :param shared: Boolean indicating whether to take a shared or exclusive - lock. Default: False. - :param purpose: human-readable purpose to put to debug logs. :returns: An instance of :class:`TaskManager`. """ # NOTE(lintan): This is a workaround to set the context of periodic tasks. context.ensure_thread_contain_context() - return TaskManager(context, node_id, shared=shared, purpose=purpose) + return TaskManager(context, *args, **kwargs) class TaskManager(object): @@ -174,7 +170,7 @@ class TaskManager(object): """ def __init__(self, context, node_id, shared=False, - purpose='unspecified action'): + purpose='unspecified action', retry=True): """Create a new TaskManager. Acquire a lock on a node. The lock can be either shared or @@ -187,6 +183,7 @@ class TaskManager(object): :param shared: Boolean indicating whether to take a shared or exclusive lock. Default: False. :param purpose: human-readable purpose to put to debug logs. + :param retry: whether to retry locking if it fails. Default: True. :raises: DriverNotFound :raises: InterfaceNotFoundInEntrypoint :raises: NodeNotFound @@ -201,6 +198,7 @@ class TaskManager(object): self._node = None self.node_id = node_id self.shared = shared + self._retry = retry self.fsm = states.machine.copy() self._purpose = purpose @@ -251,12 +249,17 @@ class TaskManager(object): def _lock(self): self._debug_timer.restart() + if self._retry: + attempts = CONF.conductor.node_locked_retry_attempts + else: + attempts = 1 + # NodeLocked exceptions can be annoying. Let's try to alleviate # some of that pain by retrying our lock attempts. The retrying # module expects a wait_fixed value in milliseconds. @retrying.retry( retry_on_exception=lambda e: isinstance(e, exception.NodeLocked), - stop_max_attempt_number=CONF.conductor.node_locked_retry_attempts, + stop_max_attempt_number=attempts, wait_fixed=CONF.conductor.node_locked_retry_interval * 1000) def reserve_node(): self.node = objects.Node.reserve(self.context, CONF.host, diff --git a/ironic/tests/unit/conductor/test_task_manager.py b/ironic/tests/unit/conductor/test_task_manager.py index 37255af50f..7043fee5a0 100644 --- a/ironic/tests/unit/conductor/test_task_manager.py +++ b/ironic/tests/unit/conductor/test_task_manager.py @@ -159,6 +159,28 @@ class TaskManagerTestCase(db_base.DbTestCase): reserve_mock.assert_has_calls(expected_calls) self.assertEqual(2, reserve_mock.call_count) + def test_excl_lock_exception_no_retries( + self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock, + get_ports_mock, build_driver_mock, + reserve_mock, release_mock, node_get_mock): + retry_attempts = 3 + self.config(node_locked_retry_attempts=retry_attempts, + group='conductor') + + # Fail on the first lock attempt, succeed on the second. + reserve_mock.side_effect = [exception.NodeLocked(node='foo', + host='foo'), + self.node] + + self.assertRaises(exception.NodeLocked, + task_manager.TaskManager, + self.context, + 'fake-node-id', + retry=False) + + reserve_mock.assert_called_once_with(self.context, self.host, + 'fake-node-id') + def test_excl_lock_reserve_exception( self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock, get_ports_mock, build_driver_mock,