From e09d4d6dd0f6570400d28531e56d2166141357cb Mon Sep 17 00:00:00 2001 From: Daniel Alvarez Date: Mon, 15 Jun 2020 17:17:53 +0200 Subject: [PATCH] [OVN] Avoid unnecessary DB writes during agent liveness check As stated in the bug description, there are many writes of the agent liveness external_ids into the Chassis table. There was a protection to avoid bumping nb_cfg too frequently. The same protection is reused to avoid writing into the Chassis external_ids. This patch reduces the number of transactions to the SB database and, therefore, the recomputations that it causes to ovn-controller in all nodes. Change-Id: I5db90fde8e7394772ec23c6384c711096c246621 Closes-Bug: #1883554 Signed-off-by: Daniel Alvarez --- .../drivers/ovn/mech_driver/mech_driver.py | 33 +++++++++++-------- .../ovn/mech_driver/test_mech_driver.py | 31 +++++++++++++---- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py index c9a73274c8c..753677a954b 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py @@ -1054,7 +1054,7 @@ class OVNMechanismDriver(api.MechanismDriver): " networking-ovn-metadata-agent status/logs.", port_id) - def agent_alive(self, chassis, type_): + def agent_alive(self, chassis, type_, update_db): nb_cfg = chassis.nb_cfg key = ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY if type_ == ovn_const.OVN_METADATA_AGENT: @@ -1066,15 +1066,17 @@ class OVNMechanismDriver(api.MechanismDriver): updated_at = timeutils.parse_isotime(chassis.external_ids[key]) except KeyError: updated_at = timeutils.utcnow(with_timezone=True) + update_db = True # Allow a maximum of 1 difference between expected and read values # to avoid false positives. if self._nb_ovn.nb_global.nb_cfg - nb_cfg <= 1: - # update the time of our successful check - value = timeutils.utcnow(with_timezone=True).isoformat() - self._sb_ovn.db_set('Chassis', chassis.uuid, - ('external_ids', {key: value})).execute( - check_error=True) + if update_db: + # Update the time of our successful check + value = timeutils.utcnow(with_timezone=True).isoformat() + self._sb_ovn.db_set('Chassis', chassis.uuid, + ('external_ids', {key: value})).execute( + check_error=True) return True now = timeutils.utcnow(with_timezone=True) @@ -1102,7 +1104,7 @@ class OVNMechanismDriver(api.MechanismDriver): 'alive': alive, 'admin_state_up': True} - def agents_from_chassis(self, chassis): + def agents_from_chassis(self, chassis, update_db=True): agent_dict = {} # Check for ovn-controller / ovn-controller gateway @@ -1113,7 +1115,7 @@ class OVNMechanismDriver(api.MechanismDriver): chassis.external_ids.get('ovn-cms-options', [])): agent_type = ovn_const.OVN_CONTROLLER_GW_AGENT - alive = self.agent_alive(chassis, agent_type) + alive = self.agent_alive(chassis, agent_type, update_db) description = chassis.external_ids.get( ovn_const.OVN_AGENT_DESC_KEY, '') agent_dict[agent_id] = self._format_agent_info( @@ -1125,7 +1127,7 @@ class OVNMechanismDriver(api.MechanismDriver): ovn_const.OVN_AGENT_METADATA_ID_KEY) if metadata_agent_id: agent_type = ovn_const.OVN_METADATA_AGENT - alive = self.agent_alive(chassis, agent_type) + alive = self.agent_alive(chassis, agent_type, update_db) description = chassis.external_ids.get( ovn_const.OVN_AGENT_METADATA_DESC_KEY, '') agent_dict[metadata_agent_id] = self._format_agent_info( @@ -1159,7 +1161,11 @@ class OVNMechanismDriver(api.MechanismDriver): setattr(self._plugin, method_name, types.MethodType(fn, self._plugin)) def ping_chassis(self): - """Update NB_Global.nb_cfg so that Chassis.nb_cfg will increment""" + """Update NB_Global.nb_cfg so that Chassis.nb_cfg will increment + + :returns: (bool) True if nb_cfg was updated. False if it was updated + recently and this call didn't trigger any update. + """ last_ping = self._nb_ovn.nb_global.external_ids.get( ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY) if last_ping: @@ -1167,11 +1173,12 @@ class OVNMechanismDriver(api.MechanismDriver): next_ping = (timeutils.parse_isotime(last_ping) + datetime.timedelta(seconds=interval)) if timeutils.utcnow(with_timezone=True) < next_ping: - return + return False with self._nb_ovn.create_transaction(check_error=True, bump_nb_cfg=True) as txn: txn.add(self._nb_ovn.check_liveness()) + return True def populate_agents(driver): @@ -1182,12 +1189,12 @@ def populate_agents(driver): def get_agents(self, context, filters=None, fields=None, _driver=None): - _driver.ping_chassis() + update_db = _driver.ping_chassis() filters = filters or {} agent_list = [] populate_agents(_driver) for ch in AGENTS.values(): - for agent in _driver.agents_from_chassis(ch).values(): + for agent in _driver.agents_from_chassis(ch, update_db).values(): if all(agent[k] in v for k, v in filters.items()): agent_list.append(agent) return agent_list diff --git a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py index 08ca171a4a5..2579e5a2437 100644 --- a/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py +++ b/neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py @@ -1543,7 +1543,10 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase): ovn_const.OVN_METADATA_AGENT): self.mech_driver._nb_ovn.nb_global.nb_cfg = 5 chassis = self._add_chassis_agent(5, agent_type) - self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type)) + self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type, + update_db=True)) + # Assert that each Chassis has been updated in the SB database + self.assertEqual(2, self.sb_ovn.db_set.call_count) def test_agent_alive_true_one_diff(self): # Agent should be reported as alive when the nb_cfg delta is 1 @@ -1554,14 +1557,16 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase): now = timeutils.utcnow() updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1) chassis = self._add_chassis_agent(4, agent_type, updated_at) - self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type)) + self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type, + update_db=True)) def test_agent_alive_not_timed_out(self): for agent_type in (ovn_const.OVN_CONTROLLER_AGENT, ovn_const.OVN_METADATA_AGENT): self.mech_driver._nb_ovn.nb_global.nb_cfg = 5 chassis = self._add_chassis_agent(3, agent_type) - self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type), + self.assertTrue(self.mech_driver.agent_alive( + chassis, agent_type, update_db=True), "Agent type %s is not alive" % agent_type) def test_agent_alive_timed_out(self): @@ -1571,7 +1576,17 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase): now = timeutils.utcnow() updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1) chassis = self._add_chassis_agent(3, agent_type, updated_at) - self.assertFalse(self.mech_driver.agent_alive(chassis, agent_type)) + self.assertFalse(self.mech_driver.agent_alive(chassis, agent_type, + update_db=True)) + + def test_agent_alive_true_skip_db_update(self): + for agent_type in (ovn_const.OVN_CONTROLLER_AGENT, + ovn_const.OVN_METADATA_AGENT): + self.mech_driver._nb_ovn.nb_global.nb_cfg = 5 + chassis = self._add_chassis_agent(5, agent_type) + self.assertTrue(self.mech_driver.agent_alive(chassis, agent_type, + update_db=False)) + self.sb_ovn.db_set.assert_not_called() def _test__update_dnat_entry_if_needed(self, up=True): ovn_conf.cfg.CONF.set_override( @@ -1656,10 +1671,12 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase): self.nb_ovn.nb_global.external_ids = { ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY: str(time)} + update_db = self.mech_driver.ping_chassis() # Since the interval has expired, assert that the "check_liveness" # command has been invoked - self.mech_driver.ping_chassis() self.nb_ovn.check_liveness.assert_called_once_with() + # Assert that ping_chassis returned True as it updated the db + self.assertTrue(update_db) def test_ping_chassis_interval_not_expired(self): ovn_conf.cfg.CONF.set_override('agent_down_time', 10) @@ -1669,9 +1686,11 @@ class TestOVNMechanismDriver(test_plugin.Ml2PluginV2TestCase): self.nb_ovn.nb_global.external_ids = { ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY: str(time)} + update_db = self.mech_driver.ping_chassis() # Assert that "check_liveness" wasn't invoked - self.mech_driver.ping_chassis() self.assertFalse(self.nb_ovn.check_liveness.called) + # Assert that ping_chassis returned False as it didn't update the db + self.assertFalse(update_db) class OVNMechanismDriverTestCase(test_plugin.Ml2PluginV2TestCase):