Event driven periodic resync task for DHCP agents

The DHCP agent will resync its state with Neutron to recover from any
transient notification or RPC errors. Currently, the periodic resync
task waits on a timer to determine whether a re-sync is necessary. The
interval between attempts by default is 5 seconds and can be longer
thru config. This may cause a potentially long delay before an agent
gets new work via an agent_updated RPC call.

The idea of this RFE is to change the timer based periodic resync task
into an event driven one. It also proposes a new DHCP agent config
option "resync_throttle" to ensure the minimum interval taken between
resync state events to avoid too frequent resyncing. In this way, we
could force the agent to act on the resync request immediately therefore
decreasing how much time is needed before DHCP services are available.

Co-authored-by: Allain Legacy <Allain.legacy@windriver.com>

Closes-Bug: #1780370
Change-Id: Ie9d758ba5f750a38dc19ea5ce8b2c6b414f9ef80
This commit is contained in:
Kailun Qin 2018-07-06 20:20:08 +08:00
parent b5fbac8ec8
commit 1d98f0a7d4
4 changed files with 96 additions and 10 deletions

View File

@ -15,6 +15,7 @@
import collections
import os
import threading
import eventlet
from neutron_lib.agent import constants as agent_consts
@ -86,6 +87,15 @@ class DhcpAgent(manager.Manager):
self.needs_resync_reasons = collections.defaultdict(list)
self.dhcp_ready_ports = set()
self.conf = conf or cfg.CONF
# If 'resync_throttle' is configured more than 'resync_interval' by
# mistake, raise exception and log with message.
if self.conf.resync_throttle > self.conf.resync_interval:
msg = _("DHCP agent must have resync_throttle <= resync_interval")
LOG.exception(msg)
raise exceptions.InvalidConfigurationOption(
opt_name='resync_throttle',
opt_value=self.conf.resync_throttle)
self._periodic_resync_event = threading.Event()
self.cache = NetworkCache()
self.dhcp_driver_cls = importutils.import_class(self.conf.dhcp_driver)
self.plugin_rpc = DhcpPluginApi(topics.PLUGIN, self.conf.host)
@ -174,6 +184,12 @@ class DhcpAgent(manager.Manager):
specified, resync all networks.
"""
self.needs_resync_reasons[network_id].append(reason)
self._periodic_resync_event.set()
# Yield to allow other threads that may be ready to run.
# This helps prevent one thread from acquiring the same lock over and
# over again, in which case no other threads waiting on the
# "dhcp-agent" lock would make any progress.
eventlet.greenthread.sleep(0)
@_sync_lock
def sync_state(self, networks=None):
@ -243,9 +259,20 @@ class DhcpAgent(manager.Manager):
@utils.exception_logger()
def _periodic_resync_helper(self):
"""Resync the dhcp state at the configured interval."""
"""Resync the dhcp state at the configured interval and throttle."""
while True:
eventlet.sleep(self.conf.resync_interval)
# threading.Event.wait blocks until the internal flag is true. It
# returns the internal flag on exit, so it will always return True
# except if a timeout is given and the operation times out.
if self._periodic_resync_event.wait(self.conf.resync_interval):
LOG.debug("Resync event has been scheduled")
clear_periodic_resync_event = self._periodic_resync_event.clear
# configure throttler for clear_periodic_resync_event to
# introduce delays between resync state events.
throttled_clear_periodic_resync_event = utils.throttler(
self.conf.resync_throttle)(clear_periodic_resync_event)
throttled_clear_periodic_resync_event()
if self.needs_resync_reasons:
# be careful to avoid a race with additions to list
# from other threads

View File

@ -24,7 +24,17 @@ DHCP_AGENT_OPTS = [
cfg.IntOpt('resync_interval', default=5,
help=_("The DHCP agent will resync its state with Neutron to "
"recover from any transient notification or RPC errors. "
"The interval is number of seconds between attempts.")),
"The interval is maximum number of seconds between "
"attempts. The resync can be done more often based on "
"the events triggered.")),
cfg.IntOpt('resync_throttle', default=1,
help=_("Throttle the number of resync state events between the "
"local DHCP state and Neutron to only once per "
"'resync_throttle' seconds. The value of throttle "
"introduces a minimum interval between resync state "
"events. Otherwise the resync may end up in a "
"busy-loop. The value must be less than "
"resync_interval.")),
cfg.StrOpt('dhcp_driver',
default='neutron.agent.linux.dhcp.Dnsmasq',
help=_("The driver used to manage the DHCP server.")),

View File

@ -242,6 +242,26 @@ class TestDhcpAgent(base.BaseTestCase):
"IPWrapper")
self.mock_ip_wrapper = self.mock_ip_wrapper_p.start()
def test_init_resync_throttle_conf(self):
try:
dhcp_agent.DhcpAgent(HOSTNAME)
except exceptions.InvalidConfigurationOption:
self.fail("DHCP agent initialization unexpectedly raised an "
"InvalidConfigurationOption exception. No exception is "
"expected with the default configurations.")
# default resync_interval = 5; default resync_throttle = 1
cfg.CONF.set_override('resync_throttle', 10)
# resync_throttle must be <= resync_interval, otherwise an
# InvalidConfigurationOption exception would be raised with log
# message.
with mock.patch.object(dhcp_agent.LOG, 'exception') as log:
with testtools.ExpectedException(
exceptions.InvalidConfigurationOption):
dhcp_agent.DhcpAgent(HOSTNAME)
log.assert_any_call("DHCP agent must have resync_throttle <= "
"resync_interval")
def test_init_host(self):
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
with mock.patch.object(dhcp, 'sync_state') as sync_state:
@ -489,18 +509,30 @@ class TestDhcpAgent(base.BaseTestCase):
['Agent has just been revived'])
def test_periodic_resync_helper(self):
with mock.patch.object(dhcp_agent.eventlet, 'sleep') as sleep:
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
resync_reasons = collections.OrderedDict(
(('a', 'reason1'), ('b', 'reason2')))
dhcp.needs_resync_reasons = resync_reasons
with mock.patch.object(dhcp, 'sync_state') as sync_state:
sync_state.side_effect = RuntimeError
with testtools.ExpectedException(RuntimeError):
dhcp._periodic_resync_helper()
sync_state.assert_called_once_with(resync_reasons.keys())
self.assertEqual(0, len(dhcp.needs_resync_reasons))
def test_periodic_resync_helper_with_event(self):
with mock.patch.object(dhcp_agent.LOG, 'debug') as log:
dhcp = dhcp_agent.DhcpAgent(HOSTNAME)
resync_reasons = collections.OrderedDict(
(('a', 'reason1'), ('b', 'reason2')))
dhcp.needs_resync_reasons = resync_reasons
dhcp.schedule_resync('reason1', 'a')
dhcp.schedule_resync('reason1', 'b')
reasons = dhcp.needs_resync_reasons.keys()
with mock.patch.object(dhcp, 'sync_state') as sync_state:
sync_state.side_effect = RuntimeError
with testtools.ExpectedException(RuntimeError):
dhcp._periodic_resync_helper()
sync_state.assert_called_once_with(resync_reasons.keys())
sleep.assert_called_once_with(dhcp.conf.resync_interval)
self.assertEqual(0, len(dhcp.needs_resync_reasons))
log.assert_any_call("Resync event has been scheduled")
sync_state.assert_called_once_with(reasons)
self.assertEqual(0, len(dhcp.needs_resync_reasons))
def test_populate_cache_on_start_without_active_networks_support(self):
# emul dhcp driver that doesn't support retrieving of active networks

View File

@ -0,0 +1,17 @@
---
features:
-
A new config option ``resync_throttle`` has been added for Neutron DHCP
agent.
This new option allows to throttle the number of resync state events
between the local DHCP state and Neutron to only once per
``resync_throttle`` seconds.
Default value for this new option is set to 1 and it should be configured
per a user's specific scenario, i.e. how responsive the user would like
his/her system to be for those DHCP resync state events.
The option is introduced together with the event driven periodic task for
DHCP agents. This enhances the agent with a faster reaction on the resync
request but ensuring a minimum interval taken between them to avoid too
frequent resyncing.
For more information see bug
`1780370 <https://bugs.launchpad.net/neutron/+bug/1780370>`_.