From 7afbd3a6b896d6d1285343f31517e9a656cfa606 Mon Sep 17 00:00:00 2001 From: Kevin Benton Date: Wed, 9 Mar 2016 23:47:35 -0800 Subject: [PATCH] Check tap bridge timestamps to detect local changes During a quick instance rebuild on Nova, it may remove a tap interface and then plug it in within the agent polling window. In this scenario the agent will not realize a device has changed and will therefore not ask the server for info an subsequently update its status. This will prevent the notification from being sent back to Nova that the vif plugging has finished so the VM will never resume state. This adds a new timestamp collection method to the common agent manager interface for devices that is used by the common agent loop to determine if a device has locally changed. The linux bridge implementation of it checks the timestamps on the tap interface's 'bridge' directory, which will change whenever the tap is added to bridge. Closes-Bug: #1531862 Change-Id: If172470e907848556b6a8aff13520f94245919bb --- neutron/agent/linux/bridge_lib.py | 7 ++ .../ml2/drivers/agent/_agent_manager_base.py | 10 +++ .../ml2/drivers/agent/_common_agent.py | 27 +++++- .../agent/linuxbridge_neutron_agent.py | 3 + .../macvtap/agent/macvtap_neutron_agent.py | 5 ++ .../functional/agent/linux/test_bridge_lib.py | 10 +++ .../ml2/drivers/agent/test__common_agent.py | 82 +++++++++++++++---- 7 files changed, 125 insertions(+), 19 deletions(-) diff --git a/neutron/agent/linux/bridge_lib.py b/neutron/agent/linux/bridge_lib.py index 96e92a7f027..a4f231d6fd2 100644 --- a/neutron/agent/linux/bridge_lib.py +++ b/neutron/agent/linux/bridge_lib.py @@ -40,6 +40,13 @@ def is_bridged_interface(interface): return os.path.exists(BRIDGE_PORT_FS_FOR_DEVICE % interface) +def get_interface_bridged_time(interface): + try: + return os.stat(BRIDGE_PORT_FS_FOR_DEVICE % interface).st_mtime + except OSError: + pass + + def get_bridge_names(): return os.listdir(BRIDGE_FS) diff --git a/neutron/plugins/ml2/drivers/agent/_agent_manager_base.py b/neutron/plugins/ml2/drivers/agent/_agent_manager_base.py index 6a149263ed4..612668f5ab2 100644 --- a/neutron/plugins/ml2/drivers/agent/_agent_manager_base.py +++ b/neutron/plugins/ml2/drivers/agent/_agent_manager_base.py @@ -137,6 +137,16 @@ class CommonAgentManagerBase(object): :return: set -- the set of all devices e.g. ['tap1', 'tap2'] """ + @abc.abstractmethod + def get_devices_modified_timestamps(self, devices): + """Get a dictionary of modified timestamps by device + + The devices passed in are expected to be the same format that + get_all_devices returns. + + :return: dict -- A dictionary of timestamps keyed by device + """ + @abc.abstractmethod def get_extension_driver_type(self): """Get the agent extension driver type. diff --git a/neutron/plugins/ml2/drivers/agent/_common_agent.py b/neutron/plugins/ml2/drivers/agent/_common_agent.py index c94fef48ed6..39626c0a8b6 100644 --- a/neutron/plugins/ml2/drivers/agent/_common_agent.py +++ b/neutron/plugins/ml2/drivers/agent/_common_agent.py @@ -320,6 +320,17 @@ class CommonAgentLoop(service.Service): self.mgr.delete_arp_spoofing_protection(devices) return resync + @staticmethod + def _get_devices_locally_modified(timestamps, previous_timestamps): + """Returns devices with previous timestamps that do not match new. + + If a device did not have a timestamp previously, it will not be + returned because this means it is new. + """ + return {device for device, timestamp in timestamps.items() + if previous_timestamps.get(device) and + timestamp != previous_timestamps.get(device)} + def scan_devices(self, previous, sync): device_info = {} @@ -333,12 +344,26 @@ class CommonAgentLoop(service.Service): previous = {'added': set(), 'current': set(), 'updated': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} # clear any orphaned ARP spoofing rules (e.g. interface was # manually deleted) if self.prevent_arp_spoofing: self.mgr.delete_unreferenced_arp_protection(current_devices) + # check to see if any devices were locally modified based on their + # timestamps changing since the previous iteration. If a timestamp + # doesn't exist for a device, this calculation is skipped for that + # device. + device_info['timestamps'] = self.mgr.get_devices_modified_timestamps( + current_devices) + locally_updated = self._get_devices_locally_modified( + device_info['timestamps'], previous['timestamps']) + if locally_updated: + LOG.debug("Adding locally changed devices to updated set: %s", + locally_updated) + updated_devices |= locally_updated + if sync: # This is the first iteration, or the previous one had a problem. # Re-add all existing devices. diff --git a/neutron/plugins/ml2/drivers/linuxbridge/agent/linuxbridge_neutron_agent.py b/neutron/plugins/ml2/drivers/linuxbridge/agent/linuxbridge_neutron_agent.py index d46da26391a..c5d3e945900 100644 --- a/neutron/plugins/ml2/drivers/linuxbridge/agent/linuxbridge_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/linuxbridge/agent/linuxbridge_neutron_agent.py @@ -551,6 +551,9 @@ class LinuxBridgeManager(amb.CommonAgentManagerBase): device.link.delete() LOG.debug("Done deleting interface %s", interface) + def get_devices_modified_timestamps(self, devices): + return {d: bridge_lib.get_interface_bridged_time(d) for d in devices} + def get_all_devices(self): devices = set() for device in bridge_lib.get_bridge_names(): diff --git a/neutron/plugins/ml2/drivers/macvtap/agent/macvtap_neutron_agent.py b/neutron/plugins/ml2/drivers/macvtap/agent/macvtap_neutron_agent.py index 11987b5c2e3..37405d17bcb 100644 --- a/neutron/plugins/ml2/drivers/macvtap/agent/macvtap_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/macvtap/agent/macvtap_neutron_agent.py @@ -121,6 +121,11 @@ class MacvtapManager(amb.CommonAgentManagerBase): "Agent terminated!")) sys.exit(1) + def get_devices_modified_timestamps(self, devices): + # TODO(kevinbenton): this should be implemented to detect + # rapid Nova instance rebuilds. + return {} + def get_all_devices(self): devices = set() all_device_names = os.listdir(MACVTAP_FS) diff --git a/neutron/tests/functional/agent/linux/test_bridge_lib.py b/neutron/tests/functional/agent/linux/test_bridge_lib.py index 5eb90d12c8d..4d561de5fea 100644 --- a/neutron/tests/functional/agent/linux/test_bridge_lib.py +++ b/neutron/tests/functional/agent/linux/test_bridge_lib.py @@ -42,6 +42,16 @@ class BridgeLibTestCase(base.BaseSudoTestCase): def test_get_bridge_names(self): self.assertIn(self.bridge.name, bridge_lib.get_bridge_names()) + def test_get_interface_bridged_time(self): + port = self.port_fixture.br_port + t1 = bridge_lib.get_interface_bridged_time(port) + self.bridge.delif(port) + self.bridge.addif(port) + t2 = bridge_lib.get_interface_bridged_time(port) + self.assertIsNotNone(t1) + self.assertIsNotNone(t2) + self.assertGreater(t2, t1) + def test_get_interface_bridge(self): bridge = bridge_lib.BridgeDevice.get_interface_bridge( self.port_fixture.br_port.name) diff --git a/neutron/tests/unit/plugins/ml2/drivers/agent/test__common_agent.py b/neutron/tests/unit/plugins/ml2/drivers/agent/test__common_agent.py index abd3c8b7080..ebee051de4e 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/agent/test__common_agent.py +++ b/neutron/tests/unit/plugins/ml2/drivers/agent/test__common_agent.py @@ -149,10 +149,22 @@ class TestCommonAgentLoop(base.BaseTestCase): agent.treat_devices_removed(devices) de_arp.assert_called_with(devices) + def test__get_devices_locally_modified(self): + new_ts = {1: 1000, 2: 2000, 3: 3000} + old_ts = {1: 10, 2: 2000, 4: 900} + # 3 and 4 are not returned because 3 is a new device and 4 is a + # removed device + self.assertEqual( + set([1]), + self.agent._get_devices_locally_modified(new_ts, old_ts)) + def _test_scan_devices(self, previous, updated, - fake_current, expected, sync): + fake_current, expected, sync, + fake_ts_current=None): self.agent.mgr = mock.Mock() self.agent.mgr.get_all_devices.return_value = fake_current + self.agent.mgr.get_devices_modified_timestamps.return_value = ( + fake_ts_current or {}) self.agent.rpc_callbacks.get_and_clear_updated_devices.return_value =\ updated @@ -163,28 +175,49 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} fake_current = set([1, 2]) updated = set() expected = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=False) + def test_scan_devices_timestamp_triggers_updated(self): + previous = {'current': set([1, 2]), + 'updated': set(), + 'added': set(), + 'removed': set(), + 'timestamps': {2: 600}} + fake_current = set([1, 2]) + updated = set() + expected = {'current': set([1, 2]), + 'updated': set([2]), + 'added': set(), + 'removed': set(), + 'timestamps': {2: 1000}} + + self._test_scan_devices(previous, updated, fake_current, expected, + sync=False, fake_ts_current={2: 1000}) + def test_scan_devices_added_removed(self): previous = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} fake_current = set([2, 3]) updated = set() expected = {'current': set([2, 3]), 'updated': set(), 'added': set([3]), - 'removed': set([1])} + 'removed': set([1]), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=False) @@ -193,13 +226,15 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([2, 3]), 'updated': set(), 'added': set(), - 'removed': set([1])} + 'removed': set([1]), + 'timestamps': {}} fake_current = set([2, 3]) updated = set() expected = {'current': set([2, 3]), 'updated': set(), 'added': set([2, 3]), - 'removed': set([1])} + 'removed': set([1]), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=True) @@ -208,7 +243,8 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([2, 3]), 'updated': set(), 'added': set(), - 'removed': set([1])} + 'removed': set([1]), + 'timestamps': {}} # Device 2 disappeared. fake_current = set([3]) updated = set() @@ -216,7 +252,8 @@ class TestCommonAgentLoop(base.BaseTestCase): expected = {'current': set([3]), 'updated': set(), 'added': set([3]), - 'removed': set([1, 2])} + 'removed': set([1, 2]), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=True) @@ -225,13 +262,15 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} fake_current = set([1, 2]) updated = set([1]) expected = {'current': set([1, 2]), 'updated': set([1]), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=False) @@ -240,13 +279,15 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} fake_current = set([1, 2]) updated = set([3]) expected = {'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=False) @@ -256,7 +297,8 @@ class TestCommonAgentLoop(base.BaseTestCase): 'current': set([1, 2]), 'updated': set(), 'added': set(), - 'removed': set() + 'removed': set(), + 'timestamps': {} } # Device 2 disappeared. fake_current = set([1]) @@ -266,7 +308,8 @@ class TestCommonAgentLoop(base.BaseTestCase): 'current': set([1]), 'updated': set(), 'added': set(), - 'removed': set([2]) + 'removed': set([2]), + 'timestamps': {} } self._test_scan_devices( previous, updated, fake_current, expected, sync=False @@ -276,13 +319,15 @@ class TestCommonAgentLoop(base.BaseTestCase): previous = {'current': set([1, 2]), 'updated': set([1]), 'added': set(), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} fake_current = set([1, 2]) updated = set([2]) expected = {'current': set([1, 2]), 'updated': set([1, 2]), 'added': set([1, 2]), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=True) @@ -295,7 +340,8 @@ class TestCommonAgentLoop(base.BaseTestCase): expected = {'current': set([1, 2]), 'updated': set(), 'added': set([1, 2]), - 'removed': set()} + 'removed': set(), + 'timestamps': {}} self._test_scan_devices(previous, updated, fake_current, expected, sync=False) self.agent.mgr.delete_unreferenced_arp_protection.assert_called_with(