Refactor the L3 agent batch notifier

This patch is the first one of a series of patches improving how the L3
agents update the router HA state to the Neutron server.

This patch partially reverts the previous patch [1]. When the batch
notifier sends events, it calls the callback method passed during the
initialization, in this case AgentMixin.notify_server. The batch
notifier spawns a new thread in charge of sending the notifications and
then wait the specified "batch_interval" time. If the callback method is
not synchronous with the notify thread execution (what [1] implemented),
the thread can finish while the RPC client is still sending the
HA router states. If another HA state update is received, then both
updates can be executed at the same time. It is possible then that a new
router state can be overwritten with an old one still not sent or
processed.

The batch notifier is refactored, to improve what initally was
implemented [2] and then updated [3]. Currently, each new event thread
can update the "pending_events" list. Then, a new thread is spawned to
process this event list. This thread decouples the current execution
from the calling thread, making the event processing a non-blocking
process.

But with the current implementation, each new process will spawn a new
thread, synchronized with the previous and new ones (using a
synchronized decorator). That means, during the batch interval time, the
system can have as many threads waiting as new events received. Those
threads will end secuentially when the previous threads end the batch
interval sleep time.

Instead of this, this patch receives and enqueue each new event and
allows only one thread to be alive while processing the event list. If
at the end of the processing loop new events are stored, the thread will
process then.

[1] I3f555a0c78fbc02d8214f12b62c37d140bc71da1
[2] I2f8cf261f48bdb632ac0bd643a337290b5297fce
[3] I82f403441564955345f47877151e0c457712dd2f

Partial-Bug: #1837635

Change-Id: I20cfa1cf5281198079f5e0dbf195755abc919581
This commit is contained in:
Rodolfo Alonso Hernandez 2019-07-24 11:17:19 +00:00
parent 6d9283c1bc
commit 8b7d2c8a93
5 changed files with 72 additions and 56 deletions

View File

@ -184,9 +184,6 @@ class AgentMixin(object):
ri.disable_radvd() ri.disable_radvd()
def notify_server(self, batched_events): def notify_server(self, batched_events):
eventlet.spawn_n(self._notify_server, batched_events)
def _notify_server(self, batched_events):
translated_states = dict((router_id, TRANSLATION_MAP[state]) for translated_states = dict((router_id, TRANSLATION_MAP[state]) for
router_id, state in batched_events) router_id, state in batched_events)
LOG.debug('Updating server with HA routers states %s', LOG.debug('Updating server with HA routers states %s',

View File

@ -10,19 +10,19 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import threading
import eventlet import eventlet
from neutron_lib.utils import runtime
from oslo_utils import uuidutils
from neutron.common import utils from neutron.common import utils
class BatchNotifier(object): class BatchNotifier(object):
def __init__(self, batch_interval, callback): def __init__(self, batch_interval, callback):
self.pending_events = [] self._pending_events = eventlet.Queue()
self.callback = callback self.callback = callback
self.batch_interval = batch_interval self.batch_interval = batch_interval
self._lock_identifier = 'notifier-%s' % uuidutils.generate_uuid() self._mutex = threading.Lock()
def queue_event(self, event): def queue_event(self, event):
"""Called to queue sending an event with the next batch of events. """Called to queue sending an event with the next batch of events.
@ -35,32 +35,35 @@ class BatchNotifier(object):
This replaces the loopingcall with a mechanism that creates a This replaces the loopingcall with a mechanism that creates a
short-lived thread on demand whenever an event is queued. That thread short-lived thread on demand whenever an event is queued. That thread
will wait for a lock, send all queued events and then sleep for will check if the lock is released, send all queued events and then
'batch_interval' seconds to allow other events to queue up. sleep for 'batch_interval' seconds. If at the end of this sleep time,
other threads have added new events to the event queue, the same thread
will process them.
This effectively acts as a rate limiter to only allow 1 batch per At the same time, other threads will be able to add new events to the
'batch_interval' seconds. queue and will spawn new "synced_send" threads to process them. But if
the mutex is locked, the spawned thread will end immediately.
:param event: the event that occurred. :param event: the event that occurred.
""" """
if not event: if not event:
return return
self.pending_events.append(event) self._pending_events.put(event)
@runtime.synchronized(self._lock_identifier)
def synced_send(): def synced_send():
self._notify() if not self._mutex.locked():
# sleeping after send while holding the lock allows subsequent with self._mutex:
# events to batch up while not self._pending_events.empty():
eventlet.sleep(self.batch_interval) self._notify()
# sleeping after send while holding the lock allows
# subsequent events to batch up
eventlet.sleep(self.batch_interval)
utils.spawn_n(synced_send) utils.spawn_n(synced_send)
def _notify(self): def _notify(self):
if not self.pending_events: batched_events = []
return while not self._pending_events.empty():
batched_events.append(self._pending_events.get())
batched_events = self.pending_events
self.pending_events = []
self.callback(batched_events) self.callback(batched_events)

View File

@ -16,6 +16,7 @@
import eventlet import eventlet
import mock import mock
from neutron.common import utils
from neutron.notifiers import batch_notifier from neutron.notifiers import batch_notifier
from neutron.tests import base from neutron.tests import base
@ -23,41 +24,54 @@ from neutron.tests import base
class TestBatchNotifier(base.BaseTestCase): class TestBatchNotifier(base.BaseTestCase):
def setUp(self): def setUp(self):
super(TestBatchNotifier, self).setUp() super(TestBatchNotifier, self).setUp()
self.notifier = batch_notifier.BatchNotifier(0.1, lambda x: x) self._received_events = eventlet.Queue()
self.spawn_n_p = mock.patch('eventlet.spawn_n') self.notifier = batch_notifier.BatchNotifier(2, self._queue_events)
self.spawn_n = self.spawn_n_p.start() self.spawn_n_p = mock.patch.object(eventlet, 'spawn_n')
def _queue_events(self, events):
for event in events:
self._received_events.put(event)
def test_queue_event_no_event(self): def test_queue_event_no_event(self):
spawn_n = self.spawn_n_p.start()
self.notifier.queue_event(None) self.notifier.queue_event(None)
self.assertEqual(0, len(self.notifier.pending_events)) self.assertEqual(0, len(self.notifier._pending_events.queue))
self.assertEqual(0, self.spawn_n.call_count) self.assertEqual(0, spawn_n.call_count)
def test_queue_event_first_event(self): def test_queue_event_first_event(self):
spawn_n = self.spawn_n_p.start()
self.notifier.queue_event(mock.Mock()) self.notifier.queue_event(mock.Mock())
self.assertEqual(1, len(self.notifier.pending_events)) self.assertEqual(1, len(self.notifier._pending_events.queue))
self.assertEqual(1, self.spawn_n.call_count) self.assertEqual(1, spawn_n.call_count)
def test_queue_event_multiple_events(self): def test_queue_event_multiple_events_notify_method(self):
self.spawn_n_p.stop() def _batch_notifier_dequeue():
c_mock = mock.patch.object(self.notifier, 'callback').start() while not self.notifier._pending_events.empty():
events = 6 self.notifier._pending_events.get()
for i in range(0, events):
self.notifier.queue_event(mock.Mock()) c_mock = mock.patch.object(self.notifier, '_notify',
side_effect=_batch_notifier_dequeue).start()
events = 20
for i in range(events):
self.notifier.queue_event('Event %s' % i)
eventlet.sleep(0) # yield to let coro execute eventlet.sleep(0) # yield to let coro execute
while self.notifier.pending_events: utils.wait_until_true(self.notifier._pending_events.empty,
# wait for coroutines to finish timeout=5)
eventlet.sleep(0.1) # Called twice: when the first thread calls "synced_send" and then,
# in the same loop, when self._pending_events is not empty(). All
# self.notifier.queue_event calls are done in just one
# "batch_interval" (2 secs).
self.assertEqual(2, c_mock.call_count) self.assertEqual(2, c_mock.call_count)
self.assertEqual(6, sum(len(c[0][0]) for c in c_mock.call_args_list))
self.assertEqual(0, len(self.notifier.pending_events))
def test_queue_event_call_send_events(self): def test_queue_event_multiple_events_callback_method(self):
with mock.patch.object(self.notifier, events = 20
'callback') as send_events: for i in range(events):
self.spawn_n.side_effect = lambda func: func() self.notifier.queue_event('Event %s' % i)
self.notifier.queue_event(mock.Mock()) eventlet.sleep(0) # yield to let coro execute
while self.notifier.pending_events:
# wait for coroutines to finish utils.wait_until_true(self.notifier._pending_events.empty,
eventlet.sleep(0.1) timeout=5)
self.assertTrue(send_events.called) expected = ['Event %s' % i for i in range(events)]
# Check the events have been handled in the same input order.
self.assertEqual(expected, list(self._received_events.queue))

View File

@ -172,7 +172,7 @@ class TestIronicNotifier(base.BaseTestCase):
original_port=original_port, port=port, **{}) original_port=original_port, port=port, **{})
self.assertEqual( self.assertEqual(
2, len(self.ironic_notifier.batch_notifier.pending_events)) 2, len(self.ironic_notifier.batch_notifier._pending_events.queue))
self.assertEqual(2, mock_spawn_n.call_count) self.assertEqual(2, mock_spawn_n.call_count)
@mock.patch.object(client, 'Client', autospec=False) @mock.patch.object(client, 'Client', autospec=False)

View File

@ -293,7 +293,7 @@ class TestNovaNotify(base.BaseTestCase):
self.nova_notifier.send_network_change( self.nova_notifier.send_network_change(
'update_floatingip', original_obj, returned_obj) 'update_floatingip', original_obj, returned_obj)
self.assertEqual( self.assertEqual(
2, len(self.nova_notifier.batch_notifier.pending_events)) 2, len(self.nova_notifier.batch_notifier._pending_events.queue))
returned_obj_non = {'floatingip': {'port_id': None}} returned_obj_non = {'floatingip': {'port_id': None}}
event_dis = self.nova_notifier.create_port_changed_event( event_dis = self.nova_notifier.create_port_changed_event(
@ -301,9 +301,10 @@ class TestNovaNotify(base.BaseTestCase):
event_assoc = self.nova_notifier.create_port_changed_event( event_assoc = self.nova_notifier.create_port_changed_event(
'update_floatingip', original_obj, returned_obj) 'update_floatingip', original_obj, returned_obj)
self.assertEqual( self.assertEqual(
self.nova_notifier.batch_notifier.pending_events[0], event_dis) self.nova_notifier.batch_notifier._pending_events.get(), event_dis)
self.assertEqual( self.assertEqual(
self.nova_notifier.batch_notifier.pending_events[1], event_assoc) self.nova_notifier.batch_notifier._pending_events.get(),
event_assoc)
def test_delete_port_notify(self): def test_delete_port_notify(self):
device_id = '32102d7b-1cf4-404d-b50a-97aae1f55f87' device_id = '32102d7b-1cf4-404d-b50a-97aae1f55f87'
@ -364,6 +365,7 @@ class TestNovaNotify(base.BaseTestCase):
self.nova_notifier.notify_port_active_direct(port) self.nova_notifier.notify_port_active_direct(port)
self.assertEqual( self.assertEqual(
1, len(self.nova_notifier.batch_notifier.pending_events)) 1, len(self.nova_notifier.batch_notifier._pending_events.queue))
self.assertEqual(expected_event, self.assertEqual(
self.nova_notifier.batch_notifier.pending_events[0]) expected_event,
self.nova_notifier.batch_notifier._pending_events.get())