From e4b0b9f8be165e688ee56a5063a3ccc91e823636 Mon Sep 17 00:00:00 2001 From: John Schwarz Date: Thu, 13 Oct 2016 16:10:07 +0300 Subject: [PATCH] Refactor L3 scheduler (unify code paths) This patch proposes a (rather major) refactor to the L3 scheduler. Basically, the auto_schedule_routers() code-path was split to 2 different code-paths, each dealing with a different case (unscheduled routers vs underscheduled routers), in addition to the API-initiated schedule() logic. This patch removes the 2 code-paths in favor of moving most of the logic into schedule(). While the result is a slightly longer schedule(), the benefit is that a lot of the previous unmaintainable code-paths of auto_schedule_routers() are now removed. Yay! :D Related-Bug: #1609738 Change-Id: I227ca60422545e40d3bbb8baf2b41a8ce14f4294 --- neutron/db/l3_agentschedulers_db.py | 27 ++- neutron/db/l3_hascheduler_db.py | 29 ---- neutron/scheduler/l3_agent_scheduler.py | 160 ++++++------------ neutron/tests/unit/extensions/test_l3.py | 7 +- .../unit/scheduler/test_l3_agent_scheduler.py | 117 +------------ 5 files changed, 88 insertions(+), 252 deletions(-) diff --git a/neutron/db/l3_agentschedulers_db.py b/neutron/db/l3_agentschedulers_db.py index a052e132fe4..dfe13b6ecc5 100644 --- a/neutron/db/l3_agentschedulers_db.py +++ b/neutron/db/l3_agentschedulers_db.py @@ -33,6 +33,7 @@ from neutron.common import _deprecate from neutron.common import utils as n_utils from neutron.db import agentschedulers_db from neutron.db.models import agent as agent_model +from neutron.db.models import l3 as l3_model from neutron.db.models import l3_attrs from neutron.db.models import l3agent as rb_model from neutron.extensions import l3agentscheduler @@ -399,6 +400,30 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, return {'agents': [self._make_agent_dict(binding.l3_agent) for binding in bindings]} + def get_routers_l3_agents_count(self, context): + """Return a map between routers and agent counts for all routers.""" + + # Postgres requires every column in the select to be present in + # the group by statement when using an aggregate function. + # One solution is to generate a subquery and join it with the desired + # columns. + binding_model = rb_model.RouterL3AgentBinding + sub_query = (context.session.query( + binding_model.router_id, + func.count(binding_model.router_id).label('count')). + join(l3_attrs.RouterExtraAttributes, + binding_model.router_id == + l3_attrs.RouterExtraAttributes.router_id). + join(l3_model.Router). + group_by(binding_model.router_id).subquery()) + + query = (context.session.query(l3_model.Router, sub_query.c.count). + outerjoin(sub_query)) + + return [(self._make_router_dict(router), agent_count) if agent_count + else (self._make_router_dict(router), 0) + for router, agent_count in query] + def get_l3_agents(self, context, active=None, filters=None): query = context.session.query(agent_model.Agent) query = query.filter( @@ -469,7 +494,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, candidates.append(l3_agent) return candidates - def auto_schedule_routers(self, context, host, router_ids): + def auto_schedule_routers(self, context, host, router_ids=None): if self.router_scheduler: self.router_scheduler.auto_schedule_routers( self, context, host, router_ids) diff --git a/neutron/db/l3_hascheduler_db.py b/neutron/db/l3_hascheduler_db.py index bbb6139a5cd..1772722f298 100644 --- a/neutron/db/l3_hascheduler_db.py +++ b/neutron/db/l3_hascheduler_db.py @@ -15,47 +15,18 @@ from neutron_lib import constants from neutron_lib.plugins import directory from sqlalchemy import func -from sqlalchemy import sql from neutron.callbacks import events from neutron.callbacks import registry from neutron.callbacks import resources from neutron.db import l3_agentschedulers_db as l3_sch_db from neutron.db.models import agent as agent_model -from neutron.db.models import l3 as l3_models -from neutron.db.models import l3_attrs from neutron.db.models import l3agent as rb_model from neutron.extensions import portbindings class L3_HA_scheduler_db_mixin(l3_sch_db.AZL3AgentSchedulerDbMixin): - def get_ha_routers_l3_agents_count(self, context): - """Return a map between HA routers and how many agents every - router is scheduled to. - """ - - # Postgres requires every column in the select to be present in - # the group by statement when using an aggregate function. - # One solution is to generate a subquery and join it with the desired - # columns. - binding_model = rb_model.RouterL3AgentBinding - sub_query = (context.session.query( - binding_model.router_id, - func.count(binding_model.router_id).label('count')). - join(l3_attrs.RouterExtraAttributes, - binding_model.router_id == - l3_attrs.RouterExtraAttributes.router_id). - join(l3_models.Router). - filter(l3_attrs.RouterExtraAttributes.ha == sql.true()). - group_by(binding_model.router_id).subquery()) - - query = (context.session.query(l3_models.Router, sub_query.c.count). - join(sub_query)) - - return [(self._make_router_dict(router), agent_count) - for router, agent_count in query] - def get_l3_agents_ordered_by_num_routers(self, context, agent_ids): if not agent_ids: return [] diff --git a/neutron/scheduler/l3_agent_scheduler.py b/neutron/scheduler/l3_agent_scheduler.py index b4f06a9631f..3c6ed52a5b2 100644 --- a/neutron/scheduler/l3_agent_scheduler.py +++ b/neutron/scheduler/l3_agent_scheduler.py @@ -23,14 +23,13 @@ from neutron_lib import constants as lib_const from oslo_config import cfg from oslo_db import exception as db_exc from oslo_log import log as logging +from oslo_log import versionutils import six -from sqlalchemy import sql from neutron._i18n import _LW from neutron.common import utils from neutron.db import api as db_api from neutron.db import l3_hamode_db -from neutron.db.models import l3 as l3_models from neutron.db.models import l3agent as rb_model from neutron.extensions import availability_zone as az_ext from neutron.extensions import l3 @@ -46,14 +45,13 @@ class L3Scheduler(object): def __init__(self): self.max_ha_agents = cfg.CONF.max_l3_agents_per_router - @abc.abstractmethod - def schedule(self, plugin, context, router_id, - candidates=None, hints=None): + def schedule(self, plugin, context, router_id, candidates=None): """Schedule the router to an active L3 agent. Schedule the router only if it is not already scheduled. """ - pass + return self._schedule_router( + plugin, context, router_id, candidates=candidates) def _router_has_binding(self, context, router_id, l3_agent_id): router_binding_model = rb_model.RouterL3AgentBinding @@ -64,52 +62,6 @@ class L3Scheduler(object): return query.count() > 0 - def _filter_unscheduled_routers(self, plugin, context, routers): - """Filter from list of routers the ones that are not scheduled.""" - unscheduled_routers = [] - for router in routers: - l3_agents = plugin.get_l3_agents_hosting_routers( - context, [router['id']]) - if l3_agents: - LOG.debug('Router %(router_id)s has already been ' - 'hosted by L3 agent %(agent_id)s', - {'router_id': router['id'], - 'agent_id': l3_agents[0]['id']}) - else: - unscheduled_routers.append(router) - return unscheduled_routers - - def _get_unscheduled_routers(self, plugin, context): - """Get routers with no agent binding.""" - # TODO(gongysh) consider the disabled agent's router - no_agent_binding = ~sql.exists().where( - l3_models.Router.id == - rb_model.RouterL3AgentBinding.router_id) - query = context.session.query( - l3_models.Router.id).filter(no_agent_binding) - unscheduled_router_ids = [router_id_[0] for router_id_ in query] - if unscheduled_router_ids: - return plugin.get_routers( - context, filters={'id': unscheduled_router_ids}) - return [] - - def _get_routers_to_schedule(self, plugin, context, router_ids=None): - """Verify that the routers specified need to be scheduled. - - :param context: the context - :param plugin: the core plugin - :param router_ids: the list of routers to be checked for scheduling - :returns: the list of routers to be scheduled - """ - if router_ids is not None: - filters = {'id': router_ids} - routers = plugin.get_routers(context, filters=filters) - result = self._filter_unscheduled_routers(plugin, context, routers) - else: - result = self._get_unscheduled_routers(plugin, context) - return [r for r in result - if plugin.router_supports_scheduling(context, r['id'])] - def _get_routers_can_schedule(self, plugin, context, routers, l3_agent): """Get the subset of routers that can be scheduled on the L3 agent.""" ids_to_discard = set() @@ -122,42 +74,59 @@ class L3Scheduler(object): return [r for r in routers if r['id'] not in ids_to_discard] - def auto_schedule_routers(self, plugin, context, host, router_ids): - """Schedule non-hosted routers to L3 Agent running on host. + def auto_schedule_routers(self, plugin, context, host, router_ids=None): + """Schedule under-scheduled routers to L3 Agents. - If router_ids is given, each router in router_ids is scheduled - if it is not scheduled yet. Otherwise all unscheduled routers - are scheduled. - Do not schedule the routers which are hosted already - by active l3 agents. + An under-scheduled router is a router that is either completely + un-scheduled (scheduled to 0 agents), or an HA router that is + under-scheduled (scheduled to less than max_l3_agents configuration + option. The function finds all the under-scheduled routers and + schedules them. - :returns: True if routers have been successfully assigned to host + :param host: if unspecified, under-scheduled routers are scheduled to + all agents (not necessarily from the requesting host). If + specified, under-scheduled routers are scheduled only to + the agent on 'host'. + :param router_ids: currently unused and deprecated. + kept for backward compatibility. """ + if router_ids is not None: + versionutils.report_deprecated_feature( + LOG, + _LW('Passing router_ids has no effect on L3 agent ' + 'scheduling. This is deprecated and will be ' + 'removed in the Queens release.')) + l3_agent = plugin.get_enabled_agent_on_host( context, lib_const.AGENT_TYPE_L3, host) if not l3_agent: return - unscheduled_routers = self._get_routers_to_schedule( - plugin, context, router_ids) - if not unscheduled_routers: - if utils.is_extension_supported( - plugin, lib_const.L3_HA_MODE_EXT_ALIAS): - self._schedule_ha_routers_to_additional_agent( - plugin, context, l3_agent) - return - + underscheduled_routers = self._get_underscheduled_routers( + plugin, context) target_routers = self._get_routers_can_schedule( - plugin, context, unscheduled_routers, l3_agent) - if not target_routers: - LOG.warning(_LW('No routers compatible with L3 agent ' - 'configuration on host %s'), host) - return + plugin, context, underscheduled_routers, l3_agent) - self._bind_routers(plugin, context, target_routers, l3_agent) + for router in target_routers: + self.schedule(plugin, context, router['id'], candidates=[l3_agent]) + + def _get_underscheduled_routers(self, plugin, context): + underscheduled_routers = [] + max_agents_for_ha = plugin.get_number_of_agents_for_scheduling(context) + + for router, count in plugin.get_routers_l3_agents_count(context): + if (count < 1 or + router.get('ha', False) and count < max_agents_for_ha): + # Either the router was un-scheduled (scheduled to 0 agents), + # or it's an HA router and it was under-scheduled (scheduled to + # less than max_agents_for_ha). Either way, it should be added + # to the list of routers we want to handle. + underscheduled_routers.append(router) + return underscheduled_routers def _get_candidates(self, plugin, context, sync_router): """Return L3 agents where a router could be scheduled.""" + is_ha = sync_router.get('ha', False) with context.session.begin(subtransactions=True): # allow one router is hosted by just # one enabled l3 agent hosting since active is just a @@ -165,7 +134,7 @@ class L3Scheduler(object): # active any time current_l3_agents = plugin.get_l3_agents_hosting_routers( context, [sync_router['id']], admin_state_up=True) - if current_l3_agents: + if current_l3_agents and not is_ha: LOG.debug('Router %(router_id)s has already been hosted ' 'by L3 agent %(agent_id)s', {'router_id': sync_router['id'], @@ -343,34 +312,17 @@ class L3Scheduler(object): """Return a mapping (router, # agents) matching specified filters.""" return plugin.get_ha_routers_l3_agents_count(context) - def _schedule_ha_routers_to_additional_agent(self, plugin, context, agent): - """Bind already scheduled routers to the agent. - - Retrieve the number of agents per router and check if the router has - to be scheduled on the given agent if max_l3_agents_per_router - is not yet reached. - """ - - routers_agents = self.get_ha_routers_l3_agents_counts(plugin, context, - agent) - admin_ctx = context.elevated() - underscheduled_routers = [router for router, agents in routers_agents - if (not self.max_ha_agents or - agents < self.max_ha_agents)] - schedulable_routers = self._get_routers_can_schedule( - plugin, admin_ctx, underscheduled_routers, agent) - for router in schedulable_routers: - if not self._router_has_binding(admin_ctx, router['id'], - agent.id): - self.create_ha_port_and_bind(plugin, admin_ctx, - router['id'], - router['tenant_id'], - agent) + def _filter_scheduled_agents(self, plugin, context, router_id, candidates): + hosting = plugin.get_l3_agents_hosting_routers(context, [router_id]) + return list(set(candidates) - set(hosting)) def _bind_ha_router(self, plugin, context, router_id, tenant_id, candidates): """Bind a HA router to agents based on a specific policy.""" + candidates = self._filter_scheduled_agents(plugin, context, router_id, + candidates) + chosen_agents = self._choose_router_agents_for_ha( plugin, context, candidates) @@ -384,11 +336,6 @@ class L3Scheduler(object): class ChanceScheduler(L3Scheduler): """Randomly allocate an L3 agent for a router.""" - def schedule(self, plugin, context, router_id, - candidates=None): - return self._schedule_router( - plugin, context, router_id, candidates=candidates) - def _choose_router_agent(self, plugin, context, candidates): return random.choice(candidates) @@ -400,11 +347,6 @@ class ChanceScheduler(L3Scheduler): class LeastRoutersScheduler(L3Scheduler): """Allocate to an L3 agent with the least number of routers bound.""" - def schedule(self, plugin, context, router_id, - candidates=None): - return self._schedule_router( - plugin, context, router_id, candidates=candidates) - def _choose_router_agent(self, plugin, context, candidates): candidate_ids = [candidate['id'] for candidate in candidates] chosen_agent = plugin.get_l3_agent_with_min_routers( diff --git a/neutron/tests/unit/extensions/test_l3.py b/neutron/tests/unit/extensions/test_l3.py index 95c131d2bf6..de2cd3a1aef 100644 --- a/neutron/tests/unit/extensions/test_l3.py +++ b/neutron/tests/unit/extensions/test_l3.py @@ -46,6 +46,7 @@ from neutron.db import l3_attrs_db from neutron.db import l3_db from neutron.db import l3_dvr_db from neutron.db import l3_dvrscheduler_db +from neutron.db import l3_hamode_db from neutron.db.models import l3 as l3_models from neutron.db import models_v2 from neutron.extensions import dns @@ -279,7 +280,8 @@ class TestL3NatIntPlugin(TestL3NatBasePlugin, # scheduling. class TestL3NatIntAgentSchedulingPlugin(TestL3NatIntPlugin, l3_agentschedulers_db. - L3AgentSchedulerDbMixin): + L3AgentSchedulerDbMixin, + l3_hamode_db.L3_HA_NAT_db_mixin): supported_extension_aliases = ["external-net", "router", "l3_agent_scheduler"] @@ -316,7 +318,8 @@ class TestL3NatServicePlugin(common_db_mixin.CommonDbMixin, # plugins that delegate away L3 routing functionality class TestL3NatAgentSchedulingServicePlugin(TestL3NatServicePlugin, l3_dvrscheduler_db. - L3_DVRsch_db_mixin): + L3_DVRsch_db_mixin, + l3_hamode_db.L3_HA_NAT_db_mixin): supported_extension_aliases = ["router", "l3_agent_scheduler"] diff --git a/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py b/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py index e354b36867f..d4403d090a2 100644 --- a/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py +++ b/neutron/tests/unit/scheduler/test_l3_agent_scheduler.py @@ -132,118 +132,12 @@ class L3SchedulerBaseTestCase(base.BaseTestCase): self.scheduler = FakeL3Scheduler() self.plugin = mock.Mock() - def test_auto_schedule_routers(self): - self.plugin.get_enabled_agent_on_host.return_value = [mock.ANY] - with mock.patch.object(self.scheduler, - '_get_routers_to_schedule') as gs,\ - mock.patch.object(self.scheduler, - '_get_routers_can_schedule',) as gr,\ - mock.patch.object(self.scheduler, - '_bind_routers') as gb: - self.scheduler.auto_schedule_routers( - self.plugin, mock.ANY, mock.ANY, mock.ANY) - self.assertTrue(self.plugin.get_enabled_agent_on_host.called) - self.assertTrue(gs.called) - self.assertTrue(gr.called) - self.assertTrue(gb.called) - - def test_auto_schedule_routers_no_agents(self): - self.plugin.get_enabled_agent_on_host.return_value = None - result = self.scheduler.auto_schedule_routers( - self.plugin, mock.ANY, mock.ANY, mock.ANY) - self.assertTrue(self.plugin.get_enabled_agent_on_host.called) - self.assertFalse(result) - - def test_auto_schedule_routers_no_unscheduled_routers(self): - type(self.plugin).supported_extension_aliases = ( - mock.PropertyMock(return_value=[])) - with mock.patch.object(self.scheduler, - '_get_routers_to_schedule') as mock_routers: - mock_routers.return_value = [] - result = self.scheduler.auto_schedule_routers( - self.plugin, mock.ANY, mock.ANY, mock.ANY) - self.assertTrue(self.plugin.get_enabled_agent_on_host.called) - self.assertFalse(result) - - def test_auto_schedule_routers_no_target_routers(self): - self.plugin.get_enabled_agent_on_host.return_value = [mock.ANY] - with mock.patch.object( - self.scheduler, - '_get_routers_to_schedule') as mock_unscheduled_routers,\ - mock.patch.object( - self.scheduler, - '_get_routers_can_schedule') as mock_target_routers: - mock_unscheduled_routers.return_value = mock.ANY - mock_target_routers.return_value = [] - result = self.scheduler.auto_schedule_routers( - self.plugin, mock.ANY, mock.ANY, mock.ANY) - self.assertTrue(self.plugin.get_enabled_agent_on_host.called) - self.assertFalse(result) - - def test__get_routers_to_schedule_with_router_ids(self): - router_ids = ['foo_router_1', 'foo_router_2'] - expected_routers = [ - {'id': 'foo_router1'}, {'id': 'foo_router_2'} - ] - self.plugin.get_routers.return_value = expected_routers - with mock.patch.object(self.scheduler, - '_filter_unscheduled_routers') as mock_filter: - mock_filter.return_value = expected_routers - unscheduled_routers = self.scheduler._get_routers_to_schedule( - self.plugin, mock.ANY, router_ids) - mock_filter.assert_called_once_with( - mock.ANY, self.plugin, expected_routers) - self.assertEqual(expected_routers, unscheduled_routers) - - def test__get_routers_to_schedule_without_router_ids(self): - expected_routers = [ - {'id': 'foo_router1'}, {'id': 'foo_router_2'} - ] - with mock.patch.object(self.scheduler, - '_get_unscheduled_routers') as mock_get: - mock_get.return_value = expected_routers - unscheduled_routers = self.scheduler._get_routers_to_schedule( - self.plugin, mock.ANY) - mock_get.assert_called_once_with(mock.ANY, self.plugin) - self.assertEqual(expected_routers, unscheduled_routers) - - def test__get_routers_to_schedule_excludes_unsupported(self): - routers = [ - {'id': 'router_1'}, {'id': 'router_2'}, {'id': 'router_3'} - ] - expected_routers = [{'id': 'router_2'}] - # exclude everything except for 2 - self.plugin.router_supports_scheduling = lambda c, rid: rid[-1] == '2' - with mock.patch.object(self.scheduler, - '_get_unscheduled_routers') as mock_get: - mock_get.return_value = routers - unscheduled_routers = self.scheduler._get_routers_to_schedule( - self.plugin, mock.ANY) - mock_get.assert_called_once_with(mock.ANY, self.plugin) - self.assertEqual(expected_routers, unscheduled_routers) - def _test__get_routers_can_schedule(self, routers, agent, target_routers): self.plugin.get_l3_agent_candidates.return_value = agent result = self.scheduler._get_routers_can_schedule( self.plugin, mock.ANY, routers, mock.ANY) self.assertEqual(target_routers, result) - def _test__filter_unscheduled_routers(self, routers, agents, expected): - self.plugin.get_l3_agents_hosting_routers.return_value = agents - unscheduled_routers = self.scheduler._filter_unscheduled_routers( - self.plugin, mock.ANY, routers) - self.assertEqual(expected, unscheduled_routers) - - def test__filter_unscheduled_routers_already_scheduled(self): - self._test__filter_unscheduled_routers( - [{'id': 'foo_router1'}, {'id': 'foo_router_2'}], - [{'id': 'foo_agent_id'}], []) - - def test__filter_unscheduled_routers_non_scheduled(self): - self._test__filter_unscheduled_routers( - [{'id': 'foo_router1'}, {'id': 'foo_router_2'}], - None, [{'id': 'foo_router1'}, {'id': 'foo_router_2'}]) - def test__get_routers_can_schedule_with_compat_agent(self): routers = [{'id': 'foo_router'}] self._test__get_routers_can_schedule(routers, mock.ANY, routers) @@ -1592,17 +1486,18 @@ class L3_HA_scheduler_db_mixinTestCase(L3HATestCaseMixin): self.agent4 = helpers.register_l3_agent(host='host_4') self.agent_id4 = self.agent4.id - def test_get_ha_routers_l3_agents_count(self): + def test_get_routers_l3_agents_count(self): router1 = self._create_ha_router() + cfg.CONF.set_override('max_l3_agents_per_router', 2) router2 = self._create_ha_router() router3 = self._create_ha_router(ha=False) - result = self.plugin.get_ha_routers_l3_agents_count(self.adminContext) + result = self.plugin.get_routers_l3_agents_count(self.adminContext) - self.assertEqual(2, len(result)) + self.assertEqual(3, len(result)) check_result = [(router['id'], agents) for router, agents in result] self.assertIn((router1['id'], 4), check_result) - self.assertIn((router2['id'], 4), check_result) - self.assertNotIn((router3['id'], mock.ANY), check_result) + self.assertIn((router2['id'], 2), check_result) + self.assertIn((router3['id'], 0), check_result) def test_get_ordered_l3_agents_by_num_routers(self): # Mock scheduling so that the test can control it explicitly