From 8a55f091925fd5e6742fb92783c524450843f5a0 Mon Sep 17 00:00:00 2001 From: zhouhenglc Date: Thu, 20 Jan 2022 15:09:32 +0800 Subject: [PATCH] [ovn]Refusing to bind port to dead agent Closes-bug: #1958501 Change-Id: Ia84410675d28002afc74368349c9b54f048f4f4d --- .../ml2/drivers/ovn/agent/neutron_agent.py | 9 ++ .../drivers/ovn/mech_driver/mech_driver.py | 37 +++--- .../ml2/drivers/ovn/mech_driver/ovsdb/api.py | 13 --- .../ovn/mech_driver/ovsdb/impl_idl_ovn.py | 11 -- .../ovn/mech_driver/ovsdb/test_impl_idl.py | 7 -- neutron/tests/unit/fake_resources.py | 5 +- .../ovn/mech_driver/test_mech_driver.py | 106 +++++++++++------- ...fuse-bind-dead-agent-2310f9f64c2a99de.yaml | 4 + 8 files changed, 98 insertions(+), 94 deletions(-) create mode 100644 releasenotes/notes/ovn-refuse-bind-dead-agent-2310f9f64c2a99de.yaml diff --git a/neutron/plugins/ml2/drivers/ovn/agent/neutron_agent.py b/neutron/plugins/ml2/drivers/ovn/agent/neutron_agent.py index 432e00420c5..6e3b6bfc07f 100644 --- a/neutron/plugins/ml2/drivers/ovn/agent/neutron_agent.py +++ b/neutron/plugins/ml2/drivers/ovn/agent/neutron_agent.py @@ -254,3 +254,12 @@ class AgentCache: for cls in NeutronAgent.types.values()} # Return the cached agents of agent_ids whose keys are in the cache return (self.agents[id_] for id_ in agent_ids & self.agents.keys()) + + def get_agents(self, filters=None): + filters = filters or {} + agent_list = [] + for agent in self.agents.values(): + agent_dict = agent.as_dict() + if all(agent_dict[k] in v for k, v in filters.items()): + agent_list.append(agent) + return agent_list diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py index 54c5261881e..2db013a9736 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py @@ -959,8 +959,7 @@ class OVNMechanismDriver(api.MechanismDriver): # OVN chassis information is needed to ensure a valid port bind. # Collect port binding data and refuse binding if the OVN chassis - # cannot be found. - chassis_physnets = [] + # cannot be found or is dead. try: # The PortContext host property contains special handling that # we need to take into account, thus passing both the port Dict @@ -969,14 +968,6 @@ class OVNMechanismDriver(api.MechanismDriver): bind_host = self._ovn_client.determine_bind_host( port, port_context=context) - datapath_type, iface_types, chassis_physnets = ( - self.sb_ovn.get_chassis_data_for_ml2_bind_port(bind_host)) - iface_types = iface_types.split(',') if iface_types else [] - except RuntimeError: - LOG.debug('Refusing to bind port %(port_id)s due to ' - 'no OVN chassis for host: %(host)s', - {'port_id': port['id'], 'host': bind_host}) - return except n_exc.InvalidInput as e: # The port binding profile is validated both on port creation and # update. The new rules apply to a VNIC type previously not @@ -985,7 +976,23 @@ class OVNMechanismDriver(api.MechanismDriver): LOG.error('Validation of binding profile unexpectedly failed ' 'while attempting to bind port %s', port['id']) raise e - + agents = n_agent.AgentCache().get_agents({'host': bind_host}) + if not agents: + LOG.warning('Refusing to bind port %(port_id)s due to ' + 'no OVN chassis for host: %(host)s', + {'port_id': port['id'], 'host': bind_host}) + return + agent = agents[0] + if not agent.alive: + LOG.warning("Refusing to bind port %(pid)s to dead agent: " + "%(agent)s", {'pid': context.current['id'], + 'agent': agent}) + return + chassis = agent.chassis + datapath_type = chassis.external_ids.get('datapath-type', '') + iface_types = chassis.external_ids.get('iface-types', '') + iface_types = iface_types.split(',') if iface_types else [] + chassis_physnets = self.sb_ovn._get_chassis_physnets(chassis) for segment_to_bind in context.segments_to_bind: network_type = segment_to_bind['network_type'] segmentation_id = segment_to_bind['segmentation_id'] @@ -1296,12 +1303,8 @@ class OVNMechanismDriver(api.MechanismDriver): def get_agents(self, context, filters=None, fields=None, _driver=None): _driver.ping_all_chassis() filters = filters or {} - agent_list = [] - for agent in n_agent.AgentCache(): - agent_dict = agent.as_dict() - if all(agent_dict[k] in v for k, v in filters.items()): - agent_list.append(agent_dict) - return agent_list + agent_list = n_agent.AgentCache().get_agents(filters) + return [agent.as_dict() for agent in agent_list] def get_agent(self, context, id, fields=None, _driver=None): diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py index a77d5baf411..c172b9e4d22 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py @@ -658,16 +658,3 @@ class SbAPI(api.API, metaclass=abc.ABCMeta): :param chassis_type: The type of chassis :type chassis_type: string """ - - @abc.abstractmethod - def get_chassis_data_for_ml2_bind_port(self, hostname): - """Return chassis data for ML2 port binding. - - @param hostname: The hostname of the chassis - @type hostname: string - :returns: Tuple containing the chassis datapath type, - iface types and physical networks for the - OVN bridge mappings. - :raises: RuntimeError exception if an OVN chassis - does not exist. - """ diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py index 835635c7647..c24aa2acef6 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py @@ -895,17 +895,6 @@ class OvsdbSbOvnIdl(sb_impl_idl.OvnSbApiIdlImpl, Backend): card_serial_number) raise RuntimeError(msg) - def get_chassis_data_for_ml2_bind_port(self, hostname): - try: - cmd = self.db_find_rows('Chassis', ('hostname', '=', hostname)) - chassis = next(c for c in cmd.execute(check_error=True)) - except StopIteration: - msg = _('Chassis with hostname %s does not exist') % hostname - raise RuntimeError(msg) - return (chassis.external_ids.get('datapath-type', ''), - chassis.external_ids.get('iface-types', ''), - self._get_chassis_physnets(chassis)) - def get_metadata_port_network(self, network): # TODO(twilson) This function should really just take a Row/RowView try: diff --git a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_impl_idl.py b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_impl_idl.py index 17b5fef250a..cc5718713b5 100644 --- a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_impl_idl.py +++ b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_impl_idl.py @@ -83,13 +83,6 @@ class TestSbApi(BaseOvnIdlTest): our_chassis = {c['name'] for c in self.data['chassis']} self.assertLessEqual(our_chassis, chassis_list) - def test_get_chassis_data_for_ml2_bind_port(self): - host = self.data['chassis'][0]['hostname'] - dp, iface, phys = self.api.get_chassis_data_for_ml2_bind_port(host) - self.assertEqual('', dp) - self.assertEqual('', iface) - self.assertCountEqual(phys, ['private', 'public']) - def test_chassis_exists(self): self.assertTrue(self.api.chassis_exists( self.data['chassis'][0]['hostname'])) diff --git a/neutron/tests/unit/fake_resources.py b/neutron/tests/unit/fake_resources.py index c46b30a32b7..0b379d2014a 100644 --- a/neutron/tests/unit/fake_resources.py +++ b/neutron/tests/unit/fake_resources.py @@ -175,9 +175,8 @@ class FakeOvsdbSbOvnIdl(object): self.get_chassis_and_azs = mock.Mock() self.get_chassis_and_azs.return_value = {} self.get_all_chassis = mock.Mock() - self.get_chassis_data_for_ml2_bind_port = mock.Mock() - self.get_chassis_data_for_ml2_bind_port.return_value = \ - ('fake', '', ['fake-physnet']) + self._get_chassis_physnets = mock.Mock() + self._get_chassis_physnets.return_value = ['fake-physnet'] self.get_chassis_and_physnets = mock.Mock() self.get_gateway_chassis_from_cms_options = mock.Mock() self.is_col_present = mock.Mock() diff --git a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py index 112d5b65057..45235f74f71 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py +++ b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py @@ -83,6 +83,52 @@ class MechDriverSetupBase: self.mech_driver.sb_ovn = fakes.FakeOvsdbSbOvnIdl() self.mech_driver._post_fork_event.set() self.mech_driver._ovn_client._qos_driver = mock.Mock() + neutron_agent.AgentCache(self.mech_driver) + # Because AgentCache is a singleton and we get a new mech_driver each + # setUp(), override the AgentCache driver. + neutron_agent.AgentCache().driver = self.mech_driver + agent1 = self._add_agent('agent1') + neutron_agent.AgentCache().get_agents = mock.Mock() + neutron_agent.AgentCache().get_agents.return_value = [agent1] + + def _add_chassis(self, nb_cfg, name=None): + chassis_private = mock.Mock() + chassis_private.nb_cfg = nb_cfg + chassis_private.uuid = uuid.uuid4() + chassis_private.name = name if name else str(uuid.uuid4()) + return chassis_private + + def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None, + updated_at=None): + chassis_private = chassis_private or self._add_chassis(nb_cfg) + if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance( + chassis_private.nb_cfg_timestamp, mock.Mock): + del chassis_private.nb_cfg_timestamp + chassis_private.external_ids = {} + if updated_at: + chassis_private.external_ids = { + ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY: + datetime.datetime.isoformat(updated_at)} + if agent_type == ovn_const.OVN_METADATA_AGENT: + chassis_private.external_ids.update({ + ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg, + ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())}) + chassis_private.chassis = [chassis_private] + return neutron_agent.AgentCache().update(agent_type, chassis_private, + updated_at) + + def _add_agent(self, name, alive=True): + nb_cfg = 5 + now = timeutils.utcnow(with_timezone=True) + if not alive: + updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1) + self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg + else: + updated_at = now + self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg + 2 + chassis = self._add_chassis(nb_cfg, name=name) + return self._add_chassis_agent( + nb_cfg, ovn_const.OVN_CONTROLLER_AGENT, chassis, updated_at) class TestOVNMechanismDriverBase(MechDriverSetupBase, @@ -113,10 +159,6 @@ class TestOVNMechanismDriverBase(MechDriverSetupBase, cfg.CONF.set_override('ovsdb_connection_timeout', 30, group='ovn') mock.patch.object(impl_idl_ovn.Backend, 'schema_helper').start() super().setUp() - neutron_agent.AgentCache(self.mech_driver) - # Because AgentCache is a singleton and we get a new mech_driver each - # setUp(), override the AgentCache driver. - neutron_agent.AgentCache().driver = self.mech_driver self.nb_ovn = self.mech_driver.nb_ovn self.sb_ovn = self.mech_driver.sb_ovn @@ -1189,7 +1231,7 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): attrs={'binding:vnic_type': 'unknown'}).info() fake_port_context = fakes.FakePortContext(fake_port, 'host', []) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_not_called() + neutron_agent.AgentCache().get_agents.assert_not_called() fake_port_context.set_binding.assert_not_called() def _test_bind_port_failed(self, fake_segments): @@ -1198,13 +1240,12 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): fake_port_context = fakes.FakePortContext( fake_port, fake_host, fake_segments) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( - fake_host) + neutron_agent.AgentCache().get_agents.assert_called_once_with( + {'host': fake_host}) fake_port_context.set_binding.assert_not_called() def test_bind_port_host_not_found(self): - self.sb_ovn.get_chassis_data_for_ml2_bind_port.side_effect = \ - RuntimeError + neutron_agent.AgentCache().get_agents.return_value = [] self._test_bind_port_failed([]) def test_bind_port_no_segments_to_bind(self): @@ -1218,14 +1259,19 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): [fakes.FakeSegment.create_one_segment(attrs=segment_attrs).info()] self._test_bind_port_failed(fake_segments) + def test_bind_port_host_not_alive(self): + agent = self._add_agent('agent_no_alive', False) + neutron_agent.AgentCache().get_agents.return_value = [agent] + self._test_bind_port_failed([]) + def _test_bind_port(self, fake_segments): fake_port = fakes.FakePort.create_one_port().info() fake_host = 'host' fake_port_context = fakes.FakePortContext( fake_port, fake_host, fake_segments) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( - fake_host) + neutron_agent.AgentCache().get_agents.assert_called_once_with( + {'host': fake_host}) fake_port_context.set_binding.assert_called_once_with( fake_segments[0]['id'], portbindings.VIF_TYPE_OVS, @@ -1241,8 +1287,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): fake_port_context = fakes.FakePortContext( fake_port, fake_host, fake_segments) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( - fake_host) + neutron_agent.AgentCache().get_agents.assert_called_once_with( + {'host': fake_host}) fake_port_context.set_binding.assert_called_once_with( fake_segments[0]['id'], portbindings.VIF_TYPE_OVS, @@ -1271,8 +1317,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): fake_port_context = fakes.FakePortContext( fake_port, fake_host, fake_segments) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( - fake_smartnic_dpu) + neutron_agent.AgentCache().get_agents.assert_called_once_with( + {'host': fake_smartnic_dpu}) fake_port_context.set_binding.assert_called_once_with( fake_segments[0]['id'], portbindings.VIF_TYPE_OVS, @@ -1292,8 +1338,8 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): fake_port_context = fakes.FakePortContext( fake_port, fake_host, fake_segments) self.mech_driver.bind_port(fake_port_context) - self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with( - fake_host) + neutron_agent.AgentCache().get_agents.assert_called_once_with( + {'host': fake_host}) fake_port_context.set_binding.assert_called_once_with( fake_segments[0]['id'], portbindings.VIF_TYPE_OVS, @@ -2072,32 +2118,6 @@ class TestOVNMechanismDriver(TestOVNMechanismDriverBase): self.assertEqual(1, mock_update_port.call_count) mock_notify_dhcp.assert_called_with(fake_port['id']) - def _add_chassis(self, nb_cfg): - chassis_private = mock.Mock() - chassis_private.nb_cfg = nb_cfg - chassis_private.uuid = uuid.uuid4() - chassis_private.name = str(uuid.uuid4()) - return chassis_private - - def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None, - updated_at=None): - chassis_private = chassis_private or self._add_chassis(nb_cfg) - if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance( - chassis_private.nb_cfg_timestamp, mock.Mock): - del chassis_private.nb_cfg_timestamp - chassis_private.external_ids = {} - if updated_at: - chassis_private.external_ids[ - ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY] = \ - datetime.datetime.isoformat(updated_at) - if agent_type == ovn_const.OVN_METADATA_AGENT: - chassis_private.external_ids.update({ - ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg, - ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())}) - chassis_private.chassis = [chassis_private] - return neutron_agent.AgentCache().update(agent_type, chassis_private, - updated_at) - def test_agent_alive_true(self): chassis_private = self._add_chassis(5) for agent_type in (ovn_const.OVN_CONTROLLER_AGENT, diff --git a/releasenotes/notes/ovn-refuse-bind-dead-agent-2310f9f64c2a99de.yaml b/releasenotes/notes/ovn-refuse-bind-dead-agent-2310f9f64c2a99de.yaml new file mode 100644 index 00000000000..4bb77a61e18 --- /dev/null +++ b/releasenotes/notes/ovn-refuse-bind-dead-agent-2310f9f64c2a99de.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + OVN mechanism driver refuses to bind a port to a dead agent. \ No newline at end of file