[ovn]Refusing to bind port to dead agent

Closes-bug: #1958501

Change-Id: Ia84410675d28002afc74368349c9b54f048f4f4d
This commit is contained in:
zhouhenglc 2022-01-20 15:09:32 +08:00
parent cddd2e5ffa
commit 8a55f09192
8 changed files with 98 additions and 94 deletions

View File

@ -254,3 +254,12 @@ class AgentCache:
for cls in NeutronAgent.types.values()} for cls in NeutronAgent.types.values()}
# Return the cached agents of agent_ids whose keys are in the cache # Return the cached agents of agent_ids whose keys are in the cache
return (self.agents[id_] for id_ in agent_ids & self.agents.keys()) return (self.agents[id_] for id_ in agent_ids & self.agents.keys())
def get_agents(self, filters=None):
filters = filters or {}
agent_list = []
for agent in self.agents.values():
agent_dict = agent.as_dict()
if all(agent_dict[k] in v for k, v in filters.items()):
agent_list.append(agent)
return agent_list

View File

@ -959,8 +959,7 @@ class OVNMechanismDriver(api.MechanismDriver):
# OVN chassis information is needed to ensure a valid port bind. # OVN chassis information is needed to ensure a valid port bind.
# Collect port binding data and refuse binding if the OVN chassis # Collect port binding data and refuse binding if the OVN chassis
# cannot be found. # cannot be found or is dead.
chassis_physnets = []
try: try:
# The PortContext host property contains special handling that # The PortContext host property contains special handling that
# we need to take into account, thus passing both the port Dict # we need to take into account, thus passing both the port Dict
@ -969,14 +968,6 @@ class OVNMechanismDriver(api.MechanismDriver):
bind_host = self._ovn_client.determine_bind_host( bind_host = self._ovn_client.determine_bind_host(
port, port,
port_context=context) port_context=context)
datapath_type, iface_types, chassis_physnets = (
self.sb_ovn.get_chassis_data_for_ml2_bind_port(bind_host))
iface_types = iface_types.split(',') if iface_types else []
except RuntimeError:
LOG.debug('Refusing to bind port %(port_id)s due to '
'no OVN chassis for host: %(host)s',
{'port_id': port['id'], 'host': bind_host})
return
except n_exc.InvalidInput as e: except n_exc.InvalidInput as e:
# The port binding profile is validated both on port creation and # The port binding profile is validated both on port creation and
# update. The new rules apply to a VNIC type previously not # update. The new rules apply to a VNIC type previously not
@ -985,7 +976,23 @@ class OVNMechanismDriver(api.MechanismDriver):
LOG.error('Validation of binding profile unexpectedly failed ' LOG.error('Validation of binding profile unexpectedly failed '
'while attempting to bind port %s', port['id']) 'while attempting to bind port %s', port['id'])
raise e raise e
agents = n_agent.AgentCache().get_agents({'host': bind_host})
if not agents:
LOG.warning('Refusing to bind port %(port_id)s due to '
'no OVN chassis for host: %(host)s',
{'port_id': port['id'], 'host': bind_host})
return
agent = agents[0]
if not agent.alive:
LOG.warning("Refusing to bind port %(pid)s to dead agent: "
"%(agent)s", {'pid': context.current['id'],
'agent': agent})
return
chassis = agent.chassis
datapath_type = chassis.external_ids.get('datapath-type', '')
iface_types = chassis.external_ids.get('iface-types', '')
iface_types = iface_types.split(',') if iface_types else []
chassis_physnets = self.sb_ovn._get_chassis_physnets(chassis)
for segment_to_bind in context.segments_to_bind: for segment_to_bind in context.segments_to_bind:
network_type = segment_to_bind['network_type'] network_type = segment_to_bind['network_type']
segmentation_id = segment_to_bind['segmentation_id'] segmentation_id = segment_to_bind['segmentation_id']
@ -1296,12 +1303,8 @@ class OVNMechanismDriver(api.MechanismDriver):
def get_agents(self, context, filters=None, fields=None, _driver=None): def get_agents(self, context, filters=None, fields=None, _driver=None):
_driver.ping_all_chassis() _driver.ping_all_chassis()
filters = filters or {} filters = filters or {}
agent_list = [] agent_list = n_agent.AgentCache().get_agents(filters)
for agent in n_agent.AgentCache(): return [agent.as_dict() for agent in agent_list]
agent_dict = agent.as_dict()
if all(agent_dict[k] in v for k, v in filters.items()):
agent_list.append(agent_dict)
return agent_list
def get_agent(self, context, id, fields=None, _driver=None): def get_agent(self, context, id, fields=None, _driver=None):

View File

@ -658,16 +658,3 @@ class SbAPI(api.API, metaclass=abc.ABCMeta):
:param chassis_type: The type of chassis :param chassis_type: The type of chassis
:type chassis_type: string :type chassis_type: string
""" """
@abc.abstractmethod
def get_chassis_data_for_ml2_bind_port(self, hostname):
"""Return chassis data for ML2 port binding.
@param hostname: The hostname of the chassis
@type hostname: string
:returns: Tuple containing the chassis datapath type,
iface types and physical networks for the
OVN bridge mappings.
:raises: RuntimeError exception if an OVN chassis
does not exist.
"""

View File

@ -895,17 +895,6 @@ class OvsdbSbOvnIdl(sb_impl_idl.OvnSbApiIdlImpl, Backend):
card_serial_number) card_serial_number)
raise RuntimeError(msg) raise RuntimeError(msg)
def get_chassis_data_for_ml2_bind_port(self, hostname):
try:
cmd = self.db_find_rows('Chassis', ('hostname', '=', hostname))
chassis = next(c for c in cmd.execute(check_error=True))
except StopIteration:
msg = _('Chassis with hostname %s does not exist') % hostname
raise RuntimeError(msg)
return (chassis.external_ids.get('datapath-type', ''),
chassis.external_ids.get('iface-types', ''),
self._get_chassis_physnets(chassis))
def get_metadata_port_network(self, network): def get_metadata_port_network(self, network):
# TODO(twilson) This function should really just take a Row/RowView # TODO(twilson) This function should really just take a Row/RowView
try: try:

View File

@ -83,13 +83,6 @@ class TestSbApi(BaseOvnIdlTest):
our_chassis = {c['name'] for c in self.data['chassis']} our_chassis = {c['name'] for c in self.data['chassis']}
self.assertLessEqual(our_chassis, chassis_list) self.assertLessEqual(our_chassis, chassis_list)
def test_get_chassis_data_for_ml2_bind_port(self):
host = self.data['chassis'][0]['hostname']
dp, iface, phys = self.api.get_chassis_data_for_ml2_bind_port(host)
self.assertEqual('', dp)
self.assertEqual('', iface)
self.assertCountEqual(phys, ['private', 'public'])
def test_chassis_exists(self): def test_chassis_exists(self):
self.assertTrue(self.api.chassis_exists( self.assertTrue(self.api.chassis_exists(
self.data['chassis'][0]['hostname'])) self.data['chassis'][0]['hostname']))

View File

@ -175,9 +175,8 @@ class FakeOvsdbSbOvnIdl(object):
self.get_chassis_and_azs = mock.Mock() self.get_chassis_and_azs = mock.Mock()
self.get_chassis_and_azs.return_value = {} self.get_chassis_and_azs.return_value = {}
self.get_all_chassis = mock.Mock() self.get_all_chassis = mock.Mock()
self.get_chassis_data_for_ml2_bind_port = mock.Mock() self._get_chassis_physnets = mock.Mock()
self.get_chassis_data_for_ml2_bind_port.return_value = \ self._get_chassis_physnets.return_value = ['fake-physnet']
('fake', '', ['fake-physnet'])
self.get_chassis_and_physnets = mock.Mock() self.get_chassis_and_physnets = mock.Mock()
self.get_gateway_chassis_from_cms_options = mock.Mock() self.get_gateway_chassis_from_cms_options = mock.Mock()
self.is_col_present = mock.Mock() self.is_col_present = mock.Mock()

View File

@ -83,6 +83,52 @@ class MechDriverSetupBase:
self.mech_driver.sb_ovn = fakes.FakeOvsdbSbOvnIdl() self.mech_driver.sb_ovn = fakes.FakeOvsdbSbOvnIdl()
self.mech_driver._post_fork_event.set() self.mech_driver._post_fork_event.set()
self.mech_driver._ovn_client._qos_driver = mock.Mock() self.mech_driver._ovn_client._qos_driver = mock.Mock()
neutron_agent.AgentCache(self.mech_driver)
# Because AgentCache is a singleton and we get a new mech_driver each
# setUp(), override the AgentCache driver.
neutron_agent.AgentCache().driver = self.mech_driver
agent1 = self._add_agent('agent1')
neutron_agent.AgentCache().get_agents = mock.Mock()
neutron_agent.AgentCache().get_agents.return_value = [agent1]
def _add_chassis(self, nb_cfg, name=None):
chassis_private = mock.Mock()
chassis_private.nb_cfg = nb_cfg
chassis_private.uuid = uuid.uuid4()
chassis_private.name = name if name else str(uuid.uuid4())
return chassis_private
def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None,
updated_at=None):
chassis_private = chassis_private or self._add_chassis(nb_cfg)
if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance(
chassis_private.nb_cfg_timestamp, mock.Mock):
del chassis_private.nb_cfg_timestamp
chassis_private.external_ids = {}
if updated_at:
chassis_private.external_ids = {
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY:
datetime.datetime.isoformat(updated_at)}
if agent_type == ovn_const.OVN_METADATA_AGENT:
chassis_private.external_ids.update({
ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg,
ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())})
chassis_private.chassis = [chassis_private]
return neutron_agent.AgentCache().update(agent_type, chassis_private,
updated_at)
def _add_agent(self, name, alive=True):
nb_cfg = 5
now = timeutils.utcnow(with_timezone=True)
if not alive:
updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1)
self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg
else:
updated_at = now
self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg + 2
chassis = self._add_chassis(nb_cfg, name=name)
return self._add_chassis_agent(
nb_cfg, ovn_const.OVN_CONTROLLER_AGENT, chassis, updated_at)
class TestOVNMechanismDriverBase(MechDriverSetupBase, class TestOVNMechanismDriverBase(MechDriverSetupBase,
@ -113,10 +159,6 @@ class TestOVNMechanismDriverBase(MechDriverSetupBase,
cfg.CONF.set_override('ovsdb_connection_timeout', 30, group='ovn') cfg.CONF.set_override('ovsdb_connection_timeout', 30, group='ovn')
mock.patch.object(impl_idl_ovn.Backend, 'schema_helper').start() mock.patch.object(impl_idl_ovn.Backend, 'schema_helper').start()
super().setUp() super().setUp()
neutron_agent.AgentCache(self.mech_driver)
# Because AgentCache is a singleton and we get a new mech_driver each
# setUp(), override the AgentCache driver.
neutron_agent.AgentCache().driver = self.mech_driver
self.nb_ovn = self.mech_driver.nb_ovn self.nb_ovn = self.mech_driver.nb_ovn
self.sb_ovn = self.mech_driver.sb_ovn self.sb_ovn = self.mech_driver.sb_ovn
@ -1189,7 +1231,7 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
attrs={'binding:vnic_type': 'unknown'}).info() attrs={'binding:vnic_type': 'unknown'}).info()
fake_port_context = fakes.FakePortContext(fake_port, 'host', []) fake_port_context = fakes.FakePortContext(fake_port, 'host', [])
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_not_called() neutron_agent.AgentCache().get_agents.assert_not_called()
fake_port_context.set_binding.assert_not_called() fake_port_context.set_binding.assert_not_called()
def _test_bind_port_failed(self, fake_segments): def _test_bind_port_failed(self, fake_segments):
@ -1198,13 +1240,12 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
fake_port_context = fakes.FakePortContext( fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments) fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( neutron_agent.AgentCache().get_agents.assert_called_once_with(
fake_host) {'host': fake_host})
fake_port_context.set_binding.assert_not_called() fake_port_context.set_binding.assert_not_called()
def test_bind_port_host_not_found(self): def test_bind_port_host_not_found(self):
self.sb_ovn.get_chassis_data_for_ml2_bind_port.side_effect = \ neutron_agent.AgentCache().get_agents.return_value = []
RuntimeError
self._test_bind_port_failed([]) self._test_bind_port_failed([])
def test_bind_port_no_segments_to_bind(self): def test_bind_port_no_segments_to_bind(self):
@ -1218,14 +1259,19 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
[fakes.FakeSegment.create_one_segment(attrs=segment_attrs).info()] [fakes.FakeSegment.create_one_segment(attrs=segment_attrs).info()]
self._test_bind_port_failed(fake_segments) self._test_bind_port_failed(fake_segments)
def test_bind_port_host_not_alive(self):
agent = self._add_agent('agent_no_alive', False)
neutron_agent.AgentCache().get_agents.return_value = [agent]
self._test_bind_port_failed([])
def _test_bind_port(self, fake_segments): def _test_bind_port(self, fake_segments):
fake_port = fakes.FakePort.create_one_port().info() fake_port = fakes.FakePort.create_one_port().info()
fake_host = 'host' fake_host = 'host'
fake_port_context = fakes.FakePortContext( fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments) fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( neutron_agent.AgentCache().get_agents.assert_called_once_with(
fake_host) {'host': fake_host})
fake_port_context.set_binding.assert_called_once_with( fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'], fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS, portbindings.VIF_TYPE_OVS,
@ -1241,8 +1287,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
fake_port_context = fakes.FakePortContext( fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments) fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( neutron_agent.AgentCache().get_agents.assert_called_once_with(
fake_host) {'host': fake_host})
fake_port_context.set_binding.assert_called_once_with( fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'], fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS, portbindings.VIF_TYPE_OVS,
@ -1271,8 +1317,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
fake_port_context = fakes.FakePortContext( fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments) fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( neutron_agent.AgentCache().get_agents.assert_called_once_with(
fake_smartnic_dpu) {'host': fake_smartnic_dpu})
fake_port_context.set_binding.assert_called_once_with( fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'], fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS, portbindings.VIF_TYPE_OVS,
@ -1292,8 +1338,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
fake_port_context = fakes.FakePortContext( fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments) fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context) self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( neutron_agent.AgentCache().get_agents.assert_called_once_with(
fake_host) {'host': fake_host})
fake_port_context.set_binding.assert_called_once_with( fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'], fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS, portbindings.VIF_TYPE_OVS,
@ -2072,32 +2118,6 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase):
self.assertEqual(1, mock_update_port.call_count) self.assertEqual(1, mock_update_port.call_count)
mock_notify_dhcp.assert_called_with(fake_port['id']) mock_notify_dhcp.assert_called_with(fake_port['id'])
def _add_chassis(self, nb_cfg):
chassis_private = mock.Mock()
chassis_private.nb_cfg = nb_cfg
chassis_private.uuid = uuid.uuid4()
chassis_private.name = str(uuid.uuid4())
return chassis_private
def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None,
updated_at=None):
chassis_private = chassis_private or self._add_chassis(nb_cfg)
if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance(
chassis_private.nb_cfg_timestamp, mock.Mock):
del chassis_private.nb_cfg_timestamp
chassis_private.external_ids = {}
if updated_at:
chassis_private.external_ids[
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY] = \
datetime.datetime.isoformat(updated_at)
if agent_type == ovn_const.OVN_METADATA_AGENT:
chassis_private.external_ids.update({
ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg,
ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())})
chassis_private.chassis = [chassis_private]
return neutron_agent.AgentCache().update(agent_type, chassis_private,
updated_at)
def test_agent_alive_true(self): def test_agent_alive_true(self):
chassis_private = self._add_chassis(5) chassis_private = self._add_chassis(5)
for agent_type in (ovn_const.OVN_CONTROLLER_AGENT, for agent_type in (ovn_const.OVN_CONTROLLER_AGENT,

View File

@ -0,0 +1,4 @@
---
features:
- |
OVN mechanism driver refuses to bind a port to a dead agent.