Event driven periodic resync task for DHCP agents
The DHCP agent will resync its state with Neutron to recover from any transient notification or RPC errors. Currently, the periodic resync task waits on a timer to determine whether a re-sync is necessary. The interval between attempts by default is 5 seconds and can be longer thru config. This may cause a potentially long delay before an agent gets new work via an agent_updated RPC call. The idea of this RFE is to change the timer based periodic resync task into an event driven one. It also proposes a new DHCP agent config option "resync_throttle" to ensure the minimum interval taken between resync state events to avoid too frequent resyncing. In this way, we could force the agent to act on the resync request immediately therefore decreasing how much time is needed before DHCP services are available. Co-authored-by: Allain Legacy <Allain.legacy@windriver.com> Closes-Bug: #1780370 Change-Id: Ie9d758ba5f750a38dc19ea5ce8b2c6b414f9ef80
This commit is contained in:
parent
b5fbac8ec8
commit
1d98f0a7d4
@ -15,6 +15,7 @@
|
||||
|
||||
import collections
|
||||
import os
|
||||
import threading
|
||||
|
||||
import eventlet
|
||||
from neutron_lib.agent import constants as agent_consts
|
||||
@ -86,6 +87,15 @@ class DhcpAgent(manager.Manager):
|
||||
self.needs_resync_reasons = collections.defaultdict(list)
|
||||
self.dhcp_ready_ports = set()
|
||||
self.conf = conf or cfg.CONF
|
||||
# If 'resync_throttle' is configured more than 'resync_interval' by
|
||||
# mistake, raise exception and log with message.
|
||||
if self.conf.resync_throttle > self.conf.resync_interval:
|
||||
msg = _("DHCP agent must have resync_throttle <= resync_interval")
|
||||
LOG.exception(msg)
|
||||
raise exceptions.InvalidConfigurationOption(
|
||||
opt_name='resync_throttle',
|
||||
opt_value=self.conf.resync_throttle)
|
||||
self._periodic_resync_event = threading.Event()
|
||||
self.cache = NetworkCache()
|
||||
self.dhcp_driver_cls = importutils.import_class(self.conf.dhcp_driver)
|
||||
self.plugin_rpc = DhcpPluginApi(topics.PLUGIN, self.conf.host)
|
||||
@ -174,6 +184,12 @@ class DhcpAgent(manager.Manager):
|
||||
specified, resync all networks.
|
||||
"""
|
||||
self.needs_resync_reasons[network_id].append(reason)
|
||||
self._periodic_resync_event.set()
|
||||
# Yield to allow other threads that may be ready to run.
|
||||
# This helps prevent one thread from acquiring the same lock over and
|
||||
# over again, in which case no other threads waiting on the
|
||||
# "dhcp-agent" lock would make any progress.
|
||||
eventlet.greenthread.sleep(0)
|
||||
|
||||
@_sync_lock
|
||||
def sync_state(self, networks=None):
|
||||
@ -243,9 +259,20 @@ class DhcpAgent(manager.Manager):
|
||||
|
||||
@utils.exception_logger()
|
||||
def _periodic_resync_helper(self):
|
||||
"""Resync the dhcp state at the configured interval."""
|
||||
"""Resync the dhcp state at the configured interval and throttle."""
|
||||
while True:
|
||||
eventlet.sleep(self.conf.resync_interval)
|
||||
# threading.Event.wait blocks until the internal flag is true. It
|
||||
# returns the internal flag on exit, so it will always return True
|
||||
# except if a timeout is given and the operation times out.
|
||||
if self._periodic_resync_event.wait(self.conf.resync_interval):
|
||||
LOG.debug("Resync event has been scheduled")
|
||||
clear_periodic_resync_event = self._periodic_resync_event.clear
|
||||
# configure throttler for clear_periodic_resync_event to
|
||||
# introduce delays between resync state events.
|
||||
throttled_clear_periodic_resync_event = utils.throttler(
|
||||
self.conf.resync_throttle)(clear_periodic_resync_event)
|
||||
throttled_clear_periodic_resync_event()
|
||||
|
||||
if self.needs_resync_reasons:
|
||||
# be careful to avoid a race with additions to list
|
||||
# from other threads
|
||||
|
@ -24,7 +24,17 @@ DHCP_AGENT_OPTS = [
|
||||
cfg.IntOpt('resync_interval', default=5,
|
||||
help=_("The DHCP agent will resync its state with Neutron to "
|
||||
"recover from any transient notification or RPC errors. "
|
||||
"The interval is number of seconds between attempts.")),
|
||||
"The interval is maximum number of seconds between "
|
||||
"attempts. The resync can be done more often based on "
|
||||
"the events triggered.")),
|
||||
cfg.IntOpt('resync_throttle', default=1,
|
||||
help=_("Throttle the number of resync state events between the "
|
||||
"local DHCP state and Neutron to only once per "
|
||||
"'resync_throttle' seconds. The value of throttle "
|
||||
"introduces a minimum interval between resync state "
|
||||
"events. Otherwise the resync may end up in a "
|
||||
"busy-loop. The value must be less than "
|
||||
"resync_interval.")),
|
||||
cfg.StrOpt('dhcp_driver',
|
||||
default='neutron.agent.linux.dhcp.Dnsmasq',
|
||||
help=_("The driver used to manage the DHCP server.")),
|
||||
|
@ -242,6 +242,26 @@ class TestDhcpAgent(base.BaseTestCase):
|
||||
"IPWrapper")
|
||||
self.mock_ip_wrapper = self.mock_ip_wrapper_p.start()
|
||||
|
||||
def test_init_resync_throttle_conf(self):
|
||||
try:
|
||||
dhcp_agent.DhcpAgent(HOSTNAME)
|
||||
except exceptions.InvalidConfigurationOption:
|
||||
self.fail("DHCP agent initialization unexpectedly raised an "
|
||||
"InvalidConfigurationOption exception. No exception is "
|
||||
"expected with the default configurations.")
|
||||
|
||||
# default resync_interval = 5; default resync_throttle = 1
|
||||
cfg.CONF.set_override('resync_throttle', 10)
|
||||
# resync_throttle must be <= resync_interval, otherwise an
|
||||
# InvalidConfigurationOption exception would be raised with log
|
||||
# message.
|
||||
with mock.patch.object(dhcp_agent.LOG, 'exception') as log:
|
||||
with testtools.ExpectedException(
|
||||
exceptions.InvalidConfigurationOption):
|
||||
dhcp_agent.DhcpAgent(HOSTNAME)
|
||||
log.assert_any_call("DHCP agent must have resync_throttle <= "
|
||||
"resync_interval")
|
||||
|
||||
def test_init_host(self):
|
||||
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
|
||||
with mock.patch.object(dhcp, 'sync_state') as sync_state:
|
||||
@ -489,18 +509,30 @@ class TestDhcpAgent(base.BaseTestCase):
|
||||
['Agent has just been revived'])
|
||||
|
||||
def test_periodic_resync_helper(self):
|
||||
with mock.patch.object(dhcp_agent.eventlet, 'sleep') as sleep:
|
||||
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
|
||||
resync_reasons = collections.OrderedDict(
|
||||
(('a', 'reason1'), ('b', 'reason2')))
|
||||
dhcp.needs_resync_reasons = resync_reasons
|
||||
with mock.patch.object(dhcp, 'sync_state') as sync_state:
|
||||
sync_state.side_effect = RuntimeError
|
||||
with testtools.ExpectedException(RuntimeError):
|
||||
dhcp._periodic_resync_helper()
|
||||
sync_state.assert_called_once_with(resync_reasons.keys())
|
||||
self.assertEqual(0, len(dhcp.needs_resync_reasons))
|
||||
|
||||
def test_periodic_resync_helper_with_event(self):
|
||||
with mock.patch.object(dhcp_agent.LOG, 'debug') as log:
|
||||
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
|
||||
resync_reasons = collections.OrderedDict(
|
||||
(('a', 'reason1'), ('b', 'reason2')))
|
||||
dhcp.needs_resync_reasons = resync_reasons
|
||||
dhcp.schedule_resync('reason1', 'a')
|
||||
dhcp.schedule_resync('reason1', 'b')
|
||||
reasons = dhcp.needs_resync_reasons.keys()
|
||||
with mock.patch.object(dhcp, 'sync_state') as sync_state:
|
||||
sync_state.side_effect = RuntimeError
|
||||
with testtools.ExpectedException(RuntimeError):
|
||||
dhcp._periodic_resync_helper()
|
||||
sync_state.assert_called_once_with(resync_reasons.keys())
|
||||
sleep.assert_called_once_with(dhcp.conf.resync_interval)
|
||||
self.assertEqual(0, len(dhcp.needs_resync_reasons))
|
||||
log.assert_any_call("Resync event has been scheduled")
|
||||
sync_state.assert_called_once_with(reasons)
|
||||
self.assertEqual(0, len(dhcp.needs_resync_reasons))
|
||||
|
||||
def test_populate_cache_on_start_without_active_networks_support(self):
|
||||
# emul dhcp driver that doesn't support retrieving of active networks
|
||||
|
@ -0,0 +1,17 @@
|
||||
---
|
||||
features:
|
||||
-
|
||||
A new config option ``resync_throttle`` has been added for Neutron DHCP
|
||||
agent.
|
||||
This new option allows to throttle the number of resync state events
|
||||
between the local DHCP state and Neutron to only once per
|
||||
``resync_throttle`` seconds.
|
||||
Default value for this new option is set to 1 and it should be configured
|
||||
per a user's specific scenario, i.e. how responsive the user would like
|
||||
his/her system to be for those DHCP resync state events.
|
||||
The option is introduced together with the event driven periodic task for
|
||||
DHCP agents. This enhances the agent with a faster reaction on the resync
|
||||
request but ensuring a minimum interval taken between them to avoid too
|
||||
frequent resyncing.
|
||||
For more information see bug
|
||||
`1780370 <https://bugs.launchpad.net/neutron/+bug/1780370>`_.
|
Loading…
Reference in New Issue
Block a user