Check tap bridge timestamps to detect local changes
During a quick instance rebuild on Nova, it may remove a tap interface and then plug it in within the agent polling window. In this scenario the agent will not realize a device has changed and will therefore not ask the server for info an subsequently update its status. This will prevent the notification from being sent back to Nova that the vif plugging has finished so the VM will never resume state. This adds a new timestamp collection method to the common agent manager interface for devices that is used by the common agent loop to determine if a device has locally changed. The linux bridge implementation of it checks the timestamps on the tap interface's 'bridge' directory, which will change whenever the tap is added to bridge. Closes-Bug: #1531862 Change-Id: If172470e907848556b6a8aff13520f94245919bb
This commit is contained in:
parent
48649d83aa
commit
7afbd3a6b8
@ -40,6 +40,13 @@ def is_bridged_interface(interface):
|
||||
return os.path.exists(BRIDGE_PORT_FS_FOR_DEVICE % interface)
|
||||
|
||||
|
||||
def get_interface_bridged_time(interface):
|
||||
try:
|
||||
return os.stat(BRIDGE_PORT_FS_FOR_DEVICE % interface).st_mtime
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def get_bridge_names():
|
||||
return os.listdir(BRIDGE_FS)
|
||||
|
||||
|
@ -137,6 +137,16 @@ class CommonAgentManagerBase(object):
|
||||
:return: set -- the set of all devices e.g. ['tap1', 'tap2']
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_devices_modified_timestamps(self, devices):
|
||||
"""Get a dictionary of modified timestamps by device
|
||||
|
||||
The devices passed in are expected to be the same format that
|
||||
get_all_devices returns.
|
||||
|
||||
:return: dict -- A dictionary of timestamps keyed by device
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_extension_driver_type(self):
|
||||
"""Get the agent extension driver type.
|
||||
|
@ -320,6 +320,17 @@ class CommonAgentLoop(service.Service):
|
||||
self.mgr.delete_arp_spoofing_protection(devices)
|
||||
return resync
|
||||
|
||||
@staticmethod
|
||||
def _get_devices_locally_modified(timestamps, previous_timestamps):
|
||||
"""Returns devices with previous timestamps that do not match new.
|
||||
|
||||
If a device did not have a timestamp previously, it will not be
|
||||
returned because this means it is new.
|
||||
"""
|
||||
return {device for device, timestamp in timestamps.items()
|
||||
if previous_timestamps.get(device) and
|
||||
timestamp != previous_timestamps.get(device)}
|
||||
|
||||
def scan_devices(self, previous, sync):
|
||||
device_info = {}
|
||||
|
||||
@ -333,12 +344,26 @@ class CommonAgentLoop(service.Service):
|
||||
previous = {'added': set(),
|
||||
'current': set(),
|
||||
'updated': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
# clear any orphaned ARP spoofing rules (e.g. interface was
|
||||
# manually deleted)
|
||||
if self.prevent_arp_spoofing:
|
||||
self.mgr.delete_unreferenced_arp_protection(current_devices)
|
||||
|
||||
# check to see if any devices were locally modified based on their
|
||||
# timestamps changing since the previous iteration. If a timestamp
|
||||
# doesn't exist for a device, this calculation is skipped for that
|
||||
# device.
|
||||
device_info['timestamps'] = self.mgr.get_devices_modified_timestamps(
|
||||
current_devices)
|
||||
locally_updated = self._get_devices_locally_modified(
|
||||
device_info['timestamps'], previous['timestamps'])
|
||||
if locally_updated:
|
||||
LOG.debug("Adding locally changed devices to updated set: %s",
|
||||
locally_updated)
|
||||
updated_devices |= locally_updated
|
||||
|
||||
if sync:
|
||||
# This is the first iteration, or the previous one had a problem.
|
||||
# Re-add all existing devices.
|
||||
|
@ -551,6 +551,9 @@ class LinuxBridgeManager(amb.CommonAgentManagerBase):
|
||||
device.link.delete()
|
||||
LOG.debug("Done deleting interface %s", interface)
|
||||
|
||||
def get_devices_modified_timestamps(self, devices):
|
||||
return {d: bridge_lib.get_interface_bridged_time(d) for d in devices}
|
||||
|
||||
def get_all_devices(self):
|
||||
devices = set()
|
||||
for device in bridge_lib.get_bridge_names():
|
||||
|
@ -121,6 +121,11 @@ class MacvtapManager(amb.CommonAgentManagerBase):
|
||||
"Agent terminated!"))
|
||||
sys.exit(1)
|
||||
|
||||
def get_devices_modified_timestamps(self, devices):
|
||||
# TODO(kevinbenton): this should be implemented to detect
|
||||
# rapid Nova instance rebuilds.
|
||||
return {}
|
||||
|
||||
def get_all_devices(self):
|
||||
devices = set()
|
||||
all_device_names = os.listdir(MACVTAP_FS)
|
||||
|
@ -42,6 +42,16 @@ class BridgeLibTestCase(base.BaseSudoTestCase):
|
||||
def test_get_bridge_names(self):
|
||||
self.assertIn(self.bridge.name, bridge_lib.get_bridge_names())
|
||||
|
||||
def test_get_interface_bridged_time(self):
|
||||
port = self.port_fixture.br_port
|
||||
t1 = bridge_lib.get_interface_bridged_time(port)
|
||||
self.bridge.delif(port)
|
||||
self.bridge.addif(port)
|
||||
t2 = bridge_lib.get_interface_bridged_time(port)
|
||||
self.assertIsNotNone(t1)
|
||||
self.assertIsNotNone(t2)
|
||||
self.assertGreater(t2, t1)
|
||||
|
||||
def test_get_interface_bridge(self):
|
||||
bridge = bridge_lib.BridgeDevice.get_interface_bridge(
|
||||
self.port_fixture.br_port.name)
|
||||
|
@ -149,10 +149,22 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
agent.treat_devices_removed(devices)
|
||||
de_arp.assert_called_with(devices)
|
||||
|
||||
def test__get_devices_locally_modified(self):
|
||||
new_ts = {1: 1000, 2: 2000, 3: 3000}
|
||||
old_ts = {1: 10, 2: 2000, 4: 900}
|
||||
# 3 and 4 are not returned because 3 is a new device and 4 is a
|
||||
# removed device
|
||||
self.assertEqual(
|
||||
set([1]),
|
||||
self.agent._get_devices_locally_modified(new_ts, old_ts))
|
||||
|
||||
def _test_scan_devices(self, previous, updated,
|
||||
fake_current, expected, sync):
|
||||
fake_current, expected, sync,
|
||||
fake_ts_current=None):
|
||||
self.agent.mgr = mock.Mock()
|
||||
self.agent.mgr.get_all_devices.return_value = fake_current
|
||||
self.agent.mgr.get_devices_modified_timestamps.return_value = (
|
||||
fake_ts_current or {})
|
||||
|
||||
self.agent.rpc_callbacks.get_and_clear_updated_devices.return_value =\
|
||||
updated
|
||||
@ -163,28 +175,49 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
fake_current = set([1, 2])
|
||||
updated = set()
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False)
|
||||
|
||||
def test_scan_devices_timestamp_triggers_updated(self):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set(),
|
||||
'timestamps': {2: 600}}
|
||||
fake_current = set([1, 2])
|
||||
updated = set()
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set([2]),
|
||||
'added': set(),
|
||||
'removed': set(),
|
||||
'timestamps': {2: 1000}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False, fake_ts_current={2: 1000})
|
||||
|
||||
def test_scan_devices_added_removed(self):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
fake_current = set([2, 3])
|
||||
updated = set()
|
||||
expected = {'current': set([2, 3]),
|
||||
'updated': set(),
|
||||
'added': set([3]),
|
||||
'removed': set([1])}
|
||||
'removed': set([1]),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False)
|
||||
@ -193,13 +226,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([2, 3]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set([1])}
|
||||
'removed': set([1]),
|
||||
'timestamps': {}}
|
||||
fake_current = set([2, 3])
|
||||
updated = set()
|
||||
expected = {'current': set([2, 3]),
|
||||
'updated': set(),
|
||||
'added': set([2, 3]),
|
||||
'removed': set([1])}
|
||||
'removed': set([1]),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=True)
|
||||
@ -208,7 +243,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([2, 3]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set([1])}
|
||||
'removed': set([1]),
|
||||
'timestamps': {}}
|
||||
# Device 2 disappeared.
|
||||
fake_current = set([3])
|
||||
updated = set()
|
||||
@ -216,7 +252,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
expected = {'current': set([3]),
|
||||
'updated': set(),
|
||||
'added': set([3]),
|
||||
'removed': set([1, 2])}
|
||||
'removed': set([1, 2]),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=True)
|
||||
@ -225,13 +262,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
fake_current = set([1, 2])
|
||||
updated = set([1])
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set([1]),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False)
|
||||
@ -240,13 +279,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
fake_current = set([1, 2])
|
||||
updated = set([3])
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False)
|
||||
@ -256,7 +297,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set()
|
||||
'removed': set(),
|
||||
'timestamps': {}
|
||||
}
|
||||
# Device 2 disappeared.
|
||||
fake_current = set([1])
|
||||
@ -266,7 +308,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
'current': set([1]),
|
||||
'updated': set(),
|
||||
'added': set(),
|
||||
'removed': set([2])
|
||||
'removed': set([2]),
|
||||
'timestamps': {}
|
||||
}
|
||||
self._test_scan_devices(
|
||||
previous, updated, fake_current, expected, sync=False
|
||||
@ -276,13 +319,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
previous = {'current': set([1, 2]),
|
||||
'updated': set([1]),
|
||||
'added': set(),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
fake_current = set([1, 2])
|
||||
updated = set([2])
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set([1, 2]),
|
||||
'added': set([1, 2]),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=True)
|
||||
@ -295,7 +340,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
|
||||
expected = {'current': set([1, 2]),
|
||||
'updated': set(),
|
||||
'added': set([1, 2]),
|
||||
'removed': set()}
|
||||
'removed': set(),
|
||||
'timestamps': {}}
|
||||
self._test_scan_devices(previous, updated, fake_current, expected,
|
||||
sync=False)
|
||||
self.agent.mgr.delete_unreferenced_arp_protection.assert_called_with(
|
||||
|
Loading…
Reference in New Issue
Block a user